hsien chen commited on
Commit
ce830c4
1 Parent(s): 98f3187
Files changed (2) hide show
  1. app.py +122 -10
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,16 +1,128 @@
1
- from transformers import pipeline
2
  import gradio as gr
 
 
 
 
3
 
 
4
 
5
- model = pipeline(
6
- "summarization",
7
- )
8
 
9
- def predict(prompt):
10
- summary = model(prompt)[0]["summary_text"]
11
- return summary
12
 
 
13
 
14
- # create an interface for the model
15
- with gr.Interface(predict, "textbox", "text") as interface:
16
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PIL.Image
2
  import gradio as gr
3
+ import base64
4
+ import time
5
+ import os
6
+ import google.generativeai as genai
7
 
8
+ import pathlib
9
 
10
+ txt_model = genai.GenerativeModel('gemini-pro')
11
+ vis_model = genai.GenerativeModel('gemini-pro-vision')
 
12
 
13
+ import os
 
 
14
 
15
+ GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')
16
 
17
+ genai.configure(api_key=GOOGLE_API_KEY)
18
+
19
+ # Image to Base 64 Converter
20
+ def image_to_base64(image_path):
21
+ with open(image_path, 'rb') as img:
22
+ encoded_string = base64.b64encode(img.read())
23
+ return encoded_string.decode('utf-8')
24
+
25
+ # Function that takes User Inputs and displays it on ChatUI
26
+ def query_message(history,txt,img):
27
+ if not img:
28
+ history += [(txt,None)]
29
+ return history
30
+ base64 = image_to_base64(img)
31
+ data_url = f"data:image/jpeg;base64,{base64}"
32
+ history += [(f"{txt} ![]({data_url})", None)]
33
+ return history
34
+
35
+ # Function that takes User Inputs, generates Response and displays on Chat UI
36
+ def llm_response(history,text,img):
37
+ if not img:
38
+ response = txt_model.generate_content(text)
39
+ history += [(None,response.text)]
40
+ return history
41
+
42
+ else:
43
+ img = PIL.Image.open(img)
44
+ response = vis_model.generate_content([text,img])
45
+ history += [(None,response.text)]
46
+ return history
47
+
48
+ # Function that takes User Inputs and displays it on ChatUI
49
+ text_box_01 = "what is in the image"
50
+ def output_query_message(img):
51
+ if not img:
52
+ return text_box_01
53
+ base64 = image_to_base64(img)
54
+ data_url = f"data:image/jpeg;base64,{base64}"
55
+ outputText = [(f"{text_box_01} ![]({data_url})", None)]
56
+ return outputText
57
+
58
+ # Function that takes User Inputs, generates Response and displays on Chat UI
59
+ def output_llm_response(img):
60
+ if not img:
61
+ response = txt_model.generate_content(text_box_01)
62
+ return response.text
63
+
64
+ else:
65
+ img = PIL.Image.open(img)
66
+ response = vis_model.generate_content([text_box_01,img])
67
+ return response.text
68
+
69
+
70
+ # Interface Code- Selector method
71
+
72
+ def sentence_builder(animal, place):
73
+ return f"""how many {animal}s from the {place} are shown in the picture?"""
74
+
75
+ # gradio block
76
+
77
+ with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
78
+ with gr.Column():
79
+ outputbox = gr.Textbox(label="line clearance...")
80
+ image_box = gr.Image(type="filepath")
81
+
82
+ btn = gr.Button("Check This")
83
+ clicked = btn.click(output_query_message,
84
+ [image_box],
85
+ outputbox
86
+ ).then(output_llm_response,
87
+ [image_box],
88
+ outputbox
89
+ )
90
+ gr.Markdown("""
91
+ ## SOP-302: Line Clearance ##
92
+
93
+ <h5 align="center"><i>"XXXX here here."</i></h5>
94
+
95
+ Multimodal-CoT incorporates vision features in a decoupled training framework. The framework consists of two training stages: (i) rationale generation and (ii) answer inference. Both stages share the same model architecture but differ in the input and output.
96
+ """)
97
+
98
+ with gr.Blocks(theme='snehilsanyal/scikit-learn') as app2:
99
+ gr.Markdown("## MM 2BB ##")
100
+ with gr.Row():
101
+ image_box = gr.Image(type="filepath")
102
+
103
+ chatbot = gr.Chatbot(
104
+ scale = 2,
105
+ height=750
106
+ )
107
+ text_box = gr.Dropdown(
108
+ ["what is in the image",
109
+ "provide alternative title for the image",
110
+ "how many birds can be seen in the picture?"],
111
+ label="Select--",
112
+ info="Will add more animals later!"
113
+ )
114
+
115
+ btn = gr.Button("Submit")
116
+ clicked = btn.click(query_message,
117
+ [chatbot,text_box,image_box],
118
+ chatbot
119
+ ).then(llm_response,
120
+ [chatbot,text_box],
121
+ chatbot
122
+ )
123
+ with gr.Blocks(theme='snehilsanyal/scikit-learn') as demo:
124
+ gr.Markdown("## SOP Camera ##")
125
+ gr.TabbedInterface([app1, app2], ["Check #1", "Check #2"])
126
+
127
+ demo.queue()
128
+ demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  gradio
2
  transformers
3
- tensorflow
 
 
 
1
  gradio
2
  transformers
3
+ tensorflow
4
+ google-generativeai
5
+ Pillow