chendl commited on
Commit
86468ab
1 Parent(s): b96f40b

update chat

Browse files
app.py CHANGED
@@ -248,17 +248,20 @@ def gradio_ask(user_message, chatbot, chat_state,radio):
248
 
249
 
250
  def gradio_answer(chatbot, chat_state, img_list, radio, text,num_beams, temperature):
 
251
  llm_message,image = \
252
  chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
253
  max_length=2000,radio = radio,text_input = text)
254
 
255
  chatbot[-1][1] = llm_message
 
 
256
  if image==None:
257
- return chatbot, chat_state, img_list
258
  else:
259
  path = build_image(image)
260
  chatbot = chatbot + [[None,(path,)]]
261
- return chatbot, chat_state, img_list
262
 
263
  task_template = {
264
  "Cap": "Summarize the content of the photo <image>.",
@@ -312,13 +315,13 @@ with gr.Blocks() as demo:
312
  # text_input = gr.Textbox(label='<question>', show_label=True, placeholder="Please upload your image first, then input...", lines=3,
313
  # value=None, visible=False, interactive=False)
314
 
315
- text_input = gr.Textbox(label='User', placeholder='Please upload your image first then input...', interactive=False)
316
 
317
  upload_button.click(upload_img, [image, text_input, chat_state,chatbot],
318
  [image, text_input, upload_button, chat_state, img_list,chatbot])
319
 
320
  text_input.submit(gradio_ask, [text_input, chatbot, chat_state,radio], [text_input, chatbot, chat_state]).then(
321
- gradio_answer, [chatbot, chat_state, img_list, radio, text_input,num_beams, temperature], [chatbot, chat_state, img_list]
322
  )
323
  clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, upload_button, chat_state, img_list],
324
  queue=False)
 
248
 
249
 
250
  def gradio_answer(chatbot, chat_state, img_list, radio, text,num_beams, temperature):
251
+ image == None
252
  llm_message,image = \
253
  chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
254
  max_length=2000,radio = radio,text_input = text)
255
 
256
  chatbot[-1][1] = llm_message
257
+ if chat_state[-1]["from"]=="gpt":
258
+ chat_state[-1]["value"] = llm_message
259
  if image==None:
260
+ return "", chatbot, chat_state, img_list
261
  else:
262
  path = build_image(image)
263
  chatbot = chatbot + [[None,(path,)]]
264
+ return "", chatbot, chat_state, img_list
265
 
266
  task_template = {
267
  "Cap": "Summarize the content of the photo <image>.",
 
315
  # text_input = gr.Textbox(label='<question>', show_label=True, placeholder="Please upload your image first, then input...", lines=3,
316
  # value=None, visible=False, interactive=False)
317
 
318
+ text_input = gr.Textbox(label='User', placeholder='Please upload your image first, then input...', interactive=False)
319
 
320
  upload_button.click(upload_img, [image, text_input, chat_state,chatbot],
321
  [image, text_input, upload_button, chat_state, img_list,chatbot])
322
 
323
  text_input.submit(gradio_ask, [text_input, chatbot, chat_state,radio], [text_input, chatbot, chat_state]).then(
324
+ gradio_answer, [chatbot, chat_state, img_list, radio, text_input,num_beams, temperature], [text_input,chatbot, chat_state, img_list]
325
  )
326
  clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, upload_button, chat_state, img_list],
327
  queue=False)
multimodal/open_flamingo/chat/conversation.py CHANGED
@@ -287,7 +287,7 @@ class Chat:
287
  elif radio in ["VQA"]:
288
  conv.append({
289
  "from": "human",
290
- "value": f"Answer the question using a single word or phrase.{text}",
291
  })
292
  elif radio in ["REC"]:
293
  conv.append({
@@ -368,7 +368,7 @@ class Chat:
368
  conv.append(
369
  {
370
  "from": "gpt",
371
- "value": object_token + text_input + end_token + visual_token
372
  }
373
  )
374
  else:
@@ -427,17 +427,6 @@ class Chat:
427
  added_bbox_list=None,
428
  add_box=False,
429
  )
430
- # with torch.no_grad():
431
- # outputs = self.model.generate(
432
- # batch_images,
433
- # input_ids,
434
- # attention_mask=attention_mask,
435
- # max_new_tokens=100,
436
- # # min_new_tokens=8,
437
- # num_beams=1,
438
- # image_start_index_list=image_start_index_list,
439
- # image_nums=image_nums,
440
- # )
441
  boxes = outputs["boxes"]
442
  scores = outputs["scores"]
443
  if len(scores) > 0:
@@ -463,6 +452,8 @@ class Chat:
463
  # print(
464
  # f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
465
  output_text = self.tokenizer.decode(text_outputs[0])
 
 
466
  return output_text, out_image
467
 
468
  def upload_img(self, image, conv, img_list):
 
287
  elif radio in ["VQA"]:
288
  conv.append({
289
  "from": "human",
290
+ "value": f"Answer the question using a single word or phrase. {text}",
291
  })
292
  elif radio in ["REC"]:
293
  conv.append({
 
368
  conv.append(
369
  {
370
  "from": "gpt",
371
+ "value": object_token + text_input + end_token + visual_token,
372
  }
373
  )
374
  else:
 
427
  added_bbox_list=None,
428
  add_box=False,
429
  )
 
 
 
 
 
 
 
 
 
 
 
430
  boxes = outputs["boxes"]
431
  scores = outputs["scores"]
432
  if len(scores) > 0:
 
452
  # print(
453
  # f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
454
  output_text = self.tokenizer.decode(text_outputs[0])
455
+ output_text = re.findall(r'Assistant:(.+)', output_text)[-1]
456
+
457
  return output_text, out_image
458
 
459
  def upload_img(self, image, conv, img_list):