praeclarumjj3 commited on
Commit
016e4dd
1 Parent(s): a97500b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -58
app.py CHANGED
@@ -80,48 +80,51 @@ def flag_last_response(state, model_selector, request: gr.Request):
80
  vote_last_response(state, "flag", model_selector, request)
81
  return ("",) + (disable_btn,) * 3
82
 
83
- def regenerate(state, image_process_mode, seg_process_mode):
84
  state.messages[-1][-1] = None
85
  prev_human_msg = state.messages[-2]
86
  if type(prev_human_msg[1]) in (tuple, list):
87
- prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode, prev_human_msg[1][3], seg_process_mode, None, None)
88
  state.skip_next = False
89
- return (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
90
 
91
 
92
  def clear_history(request: gr.Request):
93
  state = default_conversation.copy()
94
- return (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
95
 
96
 
97
  def add_text(state, text, image, image_process_mode, seg, seg_process_mode, depth, depth_process_mode, request: gr.Request):
98
  logger.info(f"add_text. len: {len(text)}")
99
  if len(text) <= 0 and image is None:
100
  state.skip_next = True
101
- return (state, state.to_gradio_chatbot(), "", None, None) + (no_change_btn,) * 5
102
  if args.moderate:
103
  flagged = violates_moderation(text)
104
  if flagged:
105
  state.skip_next = True
106
- return (state, state.to_gradio_chatbot(), moderation_msg, None, None) + (
107
  no_change_btn,) * 5
108
 
109
- text = text[:1576] # Hard cut-off
110
  if image is not None:
111
- text = text[:1200] # Hard cut-off for images
112
  if '<image>' not in text:
113
  text = '<image>\n' + text
114
  if seg is not None:
115
  if '<seg>' not in text:
116
  text = '<seg>\n' + text
 
 
 
117
 
118
- text = (text, image, image_process_mode, seg, seg_process_mode, None, None)
119
  if len(state.get_images(return_pil=True)) > 0:
120
  state = default_conversation.copy()
121
  state.append_message(state.roles[0], text)
122
  state.append_message(state.roles[1], None)
123
  state.skip_next = False
124
- return (state, state.to_gradio_chatbot(), "", None, None) + (disable_btn,) * 5
125
 
126
 
127
  def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request: gr.Request):
@@ -145,24 +148,6 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
145
  # Construct prompt
146
  prompt = state.get_prompt()
147
 
148
- all_images = state.get_images(return_pil=True)
149
- all_image_hash = [hashlib.md5(image.tobytes()).hexdigest() for image in all_images]
150
- for image, hash in zip(all_images, all_image_hash):
151
- t = datetime.datetime.now()
152
- filename = os.path.join(LOGDIR, "serve_images", f"{t.year}-{t.month:02d}-{t.day:02d}", f"{hash}.jpg")
153
- if not os.path.isfile(filename):
154
- os.makedirs(os.path.dirname(filename), exist_ok=True)
155
- image.save(filename)
156
-
157
- all_segs = state.get_segs(return_pil=True)
158
- all_seg_hash = [hashlib.md5(seg.tobytes()).hexdigest() for seg in all_segs]
159
- for seg, hash in zip(all_segs, all_seg_hash):
160
- t = datetime.datetime.now()
161
- filename = os.path.join(LOGDIR, "serve_segs", f"{t.year}-{t.month:02d}-{t.day:02d}", f"{hash}.jpg")
162
- if not os.path.isfile(filename):
163
- os.makedirs(os.path.dirname(filename), exist_ok=True)
164
- seg.save(filename)
165
-
166
  # Make requests
167
  pload = {
168
  "model": model_name,
@@ -171,13 +156,15 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
171
  "top_p": float(top_p),
172
  "max_new_tokens": min(int(max_new_tokens), 1536),
173
  "stop": state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2,
174
- "images": f'List of {len(state.get_images())} images: {all_image_hash}',
175
- "segs": f'List of {len(state.get_segs())} segs: {all_seg_hash}',
 
176
  }
177
  logger.info(f"==== request ====\n{pload}")
178
 
179
  pload['images'] = state.get_images()
180
  pload['segs'] = state.get_segs()
 
181
 
182
  state.messages[-1][-1] = "▌"
183
  yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
@@ -207,24 +194,8 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
207
 
208
  state.messages[-1][-1] = state.messages[-1][-1][:-1]
209
  yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
210
-
211
- finish_tstamp = time.time()
212
  logger.info(f"{output}")
213
 
214
- with open(get_conv_log_filename(), "a") as fout:
215
- data = {
216
- "tstamp": round(finish_tstamp, 4),
217
- "type": "chat",
218
- "model": model_name,
219
- "start": round(start_tstamp, 4),
220
- "finish": round(start_tstamp, 4),
221
- "state": state.dict(),
222
- "images": all_image_hash,
223
- "segs": all_seg_hash,
224
- "ip": request.client.host,
225
- }
226
- fout.write(json.dumps(data) + "\n")
227
-
228
 
229
  title = "<h1 style='margin-bottom: -10px; text-align: center'>VCoder: Versatile Vision Encoders for Multimodal Large Language Models</h1>"
230
  # style='
@@ -284,6 +255,12 @@ def build_demo(embed_mode):
284
  ["Crop", "Resize", "Pad", "Default"],
285
  value="Default",
286
  label="Preprocess for non-square Seg Map", visible=False)
 
 
 
 
 
 
287
 
288
  with gr.Accordion("Parameters", open=False) as parameter_row:
289
  temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.8, step=0.1, interactive=True, label="Temperature",)
@@ -307,13 +284,8 @@ def build_demo(embed_mode):
307
 
308
  cur_dir = os.path.dirname(os.path.abspath(__file__))
309
  gr.Examples(examples=[
310
- [f"{cur_dir}/examples/people.jpg", f"{cur_dir}/examples/people_pan.png", "What objects can be seen in the image?", "0.9", "1.0"],
311
- [f"{cur_dir}/examples/corgi.jpg", f"{cur_dir}/examples/corgi_pan.png", "What objects can be seen in the image?", "0.6", "0.7"],
312
- [f"{cur_dir}/examples/friends.jpg", f"{cur_dir}/examples/friends_pan.png", "Can you count the number of people in the image?", "0.8", "0.9"],
313
- [f"{cur_dir}/examples/friends.jpg", f"{cur_dir}/examples/friends_pan.png", "What is happening in the image?", "0.8", "0.9"],
314
- [f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_pan.png", "What objects can be seen in the image?", "0.5", "0.5"],
315
- [f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_ins.png", "What objects can be seen in the image?", "0.5", "0.5"],
316
- ], inputs=[imagebox, segbox, textbox, temperature, top_p])
317
 
318
  if not embed_mode:
319
  gr.Markdown(tos_markdown)
@@ -327,16 +299,16 @@ def build_demo(embed_mode):
327
  [state, model_selector], [textbox, upvote_btn, downvote_btn, flag_btn])
328
  flag_btn.click(flag_last_response,
329
  [state, model_selector], [textbox, upvote_btn, downvote_btn, flag_btn])
330
- regenerate_btn.click(regenerate, [state, image_process_mode, seg_process_mode],
331
- [state, chatbot, textbox, imagebox, segbox] + btn_list).then(
332
  http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
333
  [state, chatbot] + btn_list)
334
- clear_btn.click(clear_history, None, [state, chatbot, textbox, imagebox, segbox] + btn_list)
335
 
336
- textbox.submit(add_text, [state, textbox, imagebox, image_process_mode, segbox, seg_process_mode], [state, chatbot, textbox, imagebox, segbox] + btn_list
337
  ).then(http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
338
  [state, chatbot] + btn_list)
339
- submit_btn.click(add_text, [state, textbox, imagebox, image_process_mode, segbox, seg_process_mode], [state, chatbot, textbox, imagebox, segbox] + btn_list
340
  ).then(http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
341
  [state, chatbot] + btn_list)
342
 
 
80
  vote_last_response(state, "flag", model_selector, request)
81
  return ("",) + (disable_btn,) * 3
82
 
83
+ def regenerate(state, image_process_mode, seg_process_mode, depth_process_mode):
84
  state.messages[-1][-1] = None
85
  prev_human_msg = state.messages[-2]
86
  if type(prev_human_msg[1]) in (tuple, list):
87
+ prev_human_msg[1] = (*prev_human_msg[1][:2], image_process_mode, prev_human_msg[1][3], seg_process_mode, prev_human_msg[1][5], depth_process_mode)
88
  state.skip_next = False
89
+ return (state, state.to_gradio_chatbot(), "", None, None, None, None) + (disable_btn,) * 5
90
 
91
 
92
  def clear_history(request: gr.Request):
93
  state = default_conversation.copy()
94
+ return (state, state.to_gradio_chatbot(), "", None, None, None, None) + (disable_btn,) * 5
95
 
96
 
97
  def add_text(state, text, image, image_process_mode, seg, seg_process_mode, depth, depth_process_mode, request: gr.Request):
98
  logger.info(f"add_text. len: {len(text)}")
99
  if len(text) <= 0 and image is None:
100
  state.skip_next = True
101
+ return (state, state.to_gradio_chatbot(), "", None, None, None, None) + (no_change_btn,) * 5
102
  if args.moderate:
103
  flagged = violates_moderation(text)
104
  if flagged:
105
  state.skip_next = True
106
+ return (state, state.to_gradio_chatbot(), moderation_msg, None, None, None, None) + (
107
  no_change_btn,) * 5
108
 
109
+ text = text[:1200] # Hard cut-off
110
  if image is not None:
111
+ text = text[:864] # Hard cut-off for images
112
  if '<image>' not in text:
113
  text = '<image>\n' + text
114
  if seg is not None:
115
  if '<seg>' not in text:
116
  text = '<seg>\n' + text
117
+ if depth is not None:
118
+ if '<depth>' not in text:
119
+ text = '<depth>\n' + text
120
 
121
+ text = (text, image, image_process_mode, seg, seg_process_mode, depth, depth_process_mode)
122
  if len(state.get_images(return_pil=True)) > 0:
123
  state = default_conversation.copy()
124
  state.append_message(state.roles[0], text)
125
  state.append_message(state.roles[1], None)
126
  state.skip_next = False
127
+ return (state, state.to_gradio_chatbot(), "", None, None, None, None) + (disable_btn,) * 5
128
 
129
 
130
  def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request: gr.Request):
 
148
  # Construct prompt
149
  prompt = state.get_prompt()
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  # Make requests
152
  pload = {
153
  "model": model_name,
 
156
  "top_p": float(top_p),
157
  "max_new_tokens": min(int(max_new_tokens), 1536),
158
  "stop": state.sep if state.sep_style in [SeparatorStyle.SINGLE, SeparatorStyle.MPT] else state.sep2,
159
+ "images": f'List of {len(state.get_images())}',
160
+ "segs": f'List of {len(state.get_segs())}',
161
+ "depths": f'List of {len(state.get_depths())}',
162
  }
163
  logger.info(f"==== request ====\n{pload}")
164
 
165
  pload['images'] = state.get_images()
166
  pload['segs'] = state.get_segs()
167
+ pload['depths'] = state.get_depths()
168
 
169
  state.messages[-1][-1] = "▌"
170
  yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
 
194
 
195
  state.messages[-1][-1] = state.messages[-1][-1][:-1]
196
  yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
 
 
197
  logger.info(f"{output}")
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  title = "<h1 style='margin-bottom: -10px; text-align: center'>VCoder: Versatile Vision Encoders for Multimodal Large Language Models</h1>"
201
  # style='
 
255
  ["Crop", "Resize", "Pad", "Default"],
256
  value="Default",
257
  label="Preprocess for non-square Seg Map", visible=False)
258
+
259
+ depthbox = gr.Image(type="pil", label="Depth Map")
260
+ depth_process_mode = gr.Radio(
261
+ ["Crop", "Resize", "Pad", "Default"],
262
+ value="Default",
263
+ label="Preprocess for non-square Depth Map", visible=False)
264
 
265
  with gr.Accordion("Parameters", open=False) as parameter_row:
266
  temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.8, step=0.1, interactive=True, label="Temperature",)
 
284
 
285
  cur_dir = os.path.dirname(os.path.abspath(__file__))
286
  gr.Examples(examples=[
287
+ [f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_pan.png", f"{cur_dir}/examples/suits_depth.jpeg", "Can you describe the depth order of the objects in this image, from closest to farthest?", "0.5", "0.5"],
288
+ ], inputs=[imagebox, segbox, depthbox, textbox, temperature, top_p])
 
 
 
 
 
289
 
290
  if not embed_mode:
291
  gr.Markdown(tos_markdown)
 
299
  [state, model_selector], [textbox, upvote_btn, downvote_btn, flag_btn])
300
  flag_btn.click(flag_last_response,
301
  [state, model_selector], [textbox, upvote_btn, downvote_btn, flag_btn])
302
+ regenerate_btn.click(regenerate, [state, image_process_mode, seg_process_mode, depth_process_mode],
303
+ [state, chatbot, textbox, imagebox, segbox, depthbox] + btn_list).then(
304
  http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
305
  [state, chatbot] + btn_list)
306
+ clear_btn.click(clear_history, None, [state, chatbot, textbox, imagebox, segbox, depthbox] + btn_list)
307
 
308
+ textbox.submit(add_text, [state, textbox, imagebox, image_process_mode, segbox, seg_process_mode, depthbox, depth_process_mode], [state, chatbot, textbox, imagebox, segbox, depthbox] + btn_list
309
  ).then(http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
310
  [state, chatbot] + btn_list)
311
+ submit_btn.click(add_text, [state, textbox, imagebox, image_process_mode, segbox, seg_process_mode, depthbox, depth_process_mode], [state, chatbot, textbox, imagebox, segbox, depthbox] + btn_list
312
  ).then(http_bot, [state, model_selector, temperature, top_p, max_output_tokens],
313
  [state, chatbot] + btn_list)
314