hsienchen commited on
Commit
eb46552
1 Parent(s): 9720f9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -56
app.py CHANGED
@@ -25,30 +25,6 @@ def image_to_base64(image_path):
25
  encoded_string = base64.b64encode(img.read())
26
  return encoded_string.decode('utf-8')
27
 
28
- # Function that takes User Inputs and displays it on ChatUI
29
- def query_message(history,txt,img):
30
- if not img:
31
- history += [(txt,None)]
32
- return history
33
- base64 = image_to_base64(img)
34
- data_url = f"data:image/jpeg;base64,{base64}"
35
- history += [(f"{txt} ![]({data_url})", None)]
36
- return history
37
-
38
- # Function that takes User Inputs, generates Response and displays on Chat UI
39
- def llm_response(history,text,img):
40
- if not img:
41
- response = txt_model.generate_content(text)
42
- history += [(None,response.text)]
43
- return history
44
-
45
- else:
46
- img = PIL.Image.open(img)
47
- response = vis_model.generate_content([text,img])
48
- history += [(None,response.text)]
49
- return history
50
-
51
- # Function that takes User Inputs and displays it on ChatUI
52
 
53
  def output_query_message(img):
54
  if not img:
@@ -69,13 +45,7 @@ def output_llm_response(img):
69
  response = vis_model.generate_content([txt_prompt_1,img])
70
  return response.text
71
 
72
-
73
- # Interface Code- Selector method
74
-
75
- def sentence_builder(animal, place):
76
- return f"""how many {animal}s from the {place} are shown in the picture?"""
77
 
78
- # gradio block
79
 
80
  with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
81
  with gr.Column():
@@ -98,34 +68,10 @@ with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
98
  Multimodal-CoT incorporates vision features in a decoupled training framework. The framework consists of two training stages: (i) rationale generation and (ii) answer inference. Both stages share the same model architecture but differ in the input and output.
99
  """)
100
 
101
- with gr.Blocks(theme='snehilsanyal/scikit-learn') as app2:
102
- gr.Markdown("## MM 2BB ##")
103
- with gr.Row():
104
- image_box = gr.Image(type="filepath")
105
-
106
- chatbot = gr.Chatbot(
107
- scale = 2,
108
- height=750
109
- )
110
- text_box = gr.Dropdown(
111
- ["what is in the image",
112
- "provide alternative title for the image",
113
- "how many birds can be seen in the picture?"],
114
- label="Select--",
115
- info="Will add more animals later!"
116
- )
117
-
118
- btn = gr.Button("Submit")
119
- clicked = btn.click(query_message,
120
- [chatbot,text_box,image_box],
121
- chatbot
122
- ).then(llm_response,
123
- [chatbot,text_box],
124
- chatbot
125
- )
126
  with gr.Blocks(theme='snehilsanyal/scikit-learn') as demo:
127
  gr.Markdown("## SOP Camera ##")
128
- gr.TabbedInterface([app1, app2], ["Check #1", "Check #2"])
129
 
130
  demo.queue()
131
  demo.launch()
 
25
  encoded_string = base64.b64encode(img.read())
26
  return encoded_string.decode('utf-8')
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def output_query_message(img):
30
  if not img:
 
45
  response = vis_model.generate_content([txt_prompt_1,img])
46
  return response.text
47
 
 
 
 
 
 
48
 
 
49
 
50
  with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
51
  with gr.Column():
 
68
  Multimodal-CoT incorporates vision features in a decoupled training framework. The framework consists of two training stages: (i) rationale generation and (ii) answer inference. Both stages share the same model architecture but differ in the input and output.
69
  """)
70
 
71
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  with gr.Blocks(theme='snehilsanyal/scikit-learn') as demo:
73
  gr.Markdown("## SOP Camera ##")
74
+ gr.TabbedInterface([app1, app1], ["Check #1", "Check #2"])
75
 
76
  demo.queue()
77
  demo.launch()