Spaces:
Runtime error
Runtime error
update chat
Browse files- app.py +7 -4
- multimodal/open_flamingo/chat/conversation.py +4 -13
app.py
CHANGED
@@ -248,17 +248,20 @@ def gradio_ask(user_message, chatbot, chat_state,radio):
|
|
248 |
|
249 |
|
250 |
def gradio_answer(chatbot, chat_state, img_list, radio, text,num_beams, temperature):
|
|
|
251 |
llm_message,image = \
|
252 |
chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
|
253 |
max_length=2000,radio = radio,text_input = text)
|
254 |
|
255 |
chatbot[-1][1] = llm_message
|
|
|
|
|
256 |
if image==None:
|
257 |
-
return chatbot, chat_state, img_list
|
258 |
else:
|
259 |
path = build_image(image)
|
260 |
chatbot = chatbot + [[None,(path,)]]
|
261 |
-
return chatbot, chat_state, img_list
|
262 |
|
263 |
task_template = {
|
264 |
"Cap": "Summarize the content of the photo <image>.",
|
@@ -312,13 +315,13 @@ with gr.Blocks() as demo:
|
|
312 |
# text_input = gr.Textbox(label='<question>', show_label=True, placeholder="Please upload your image first, then input...", lines=3,
|
313 |
# value=None, visible=False, interactive=False)
|
314 |
|
315 |
-
text_input = gr.Textbox(label='User', placeholder='Please upload your image first
|
316 |
|
317 |
upload_button.click(upload_img, [image, text_input, chat_state,chatbot],
|
318 |
[image, text_input, upload_button, chat_state, img_list,chatbot])
|
319 |
|
320 |
text_input.submit(gradio_ask, [text_input, chatbot, chat_state,radio], [text_input, chatbot, chat_state]).then(
|
321 |
-
gradio_answer, [chatbot, chat_state, img_list, radio, text_input,num_beams, temperature], [chatbot, chat_state, img_list]
|
322 |
)
|
323 |
clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, upload_button, chat_state, img_list],
|
324 |
queue=False)
|
|
|
248 |
|
249 |
|
250 |
def gradio_answer(chatbot, chat_state, img_list, radio, text,num_beams, temperature):
|
251 |
+
image == None
|
252 |
llm_message,image = \
|
253 |
chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
|
254 |
max_length=2000,radio = radio,text_input = text)
|
255 |
|
256 |
chatbot[-1][1] = llm_message
|
257 |
+
if chat_state[-1]["from"]=="gpt":
|
258 |
+
chat_state[-1]["value"] = llm_message
|
259 |
if image==None:
|
260 |
+
return "", chatbot, chat_state, img_list
|
261 |
else:
|
262 |
path = build_image(image)
|
263 |
chatbot = chatbot + [[None,(path,)]]
|
264 |
+
return "", chatbot, chat_state, img_list
|
265 |
|
266 |
task_template = {
|
267 |
"Cap": "Summarize the content of the photo <image>.",
|
|
|
315 |
# text_input = gr.Textbox(label='<question>', show_label=True, placeholder="Please upload your image first, then input...", lines=3,
|
316 |
# value=None, visible=False, interactive=False)
|
317 |
|
318 |
+
text_input = gr.Textbox(label='User', placeholder='Please upload your image first, then input...', interactive=False)
|
319 |
|
320 |
upload_button.click(upload_img, [image, text_input, chat_state,chatbot],
|
321 |
[image, text_input, upload_button, chat_state, img_list,chatbot])
|
322 |
|
323 |
text_input.submit(gradio_ask, [text_input, chatbot, chat_state,radio], [text_input, chatbot, chat_state]).then(
|
324 |
+
gradio_answer, [chatbot, chat_state, img_list, radio, text_input,num_beams, temperature], [text_input,chatbot, chat_state, img_list]
|
325 |
)
|
326 |
clear.click(gradio_reset, [chat_state, img_list], [chatbot, image, text_input, upload_button, chat_state, img_list],
|
327 |
queue=False)
|
multimodal/open_flamingo/chat/conversation.py
CHANGED
@@ -287,7 +287,7 @@ class Chat:
|
|
287 |
elif radio in ["VQA"]:
|
288 |
conv.append({
|
289 |
"from": "human",
|
290 |
-
"value": f"Answer the question using a single word or phrase.{text}",
|
291 |
})
|
292 |
elif radio in ["REC"]:
|
293 |
conv.append({
|
@@ -368,7 +368,7 @@ class Chat:
|
|
368 |
conv.append(
|
369 |
{
|
370 |
"from": "gpt",
|
371 |
-
"value": object_token + text_input + end_token + visual_token
|
372 |
}
|
373 |
)
|
374 |
else:
|
@@ -427,17 +427,6 @@ class Chat:
|
|
427 |
added_bbox_list=None,
|
428 |
add_box=False,
|
429 |
)
|
430 |
-
# with torch.no_grad():
|
431 |
-
# outputs = self.model.generate(
|
432 |
-
# batch_images,
|
433 |
-
# input_ids,
|
434 |
-
# attention_mask=attention_mask,
|
435 |
-
# max_new_tokens=100,
|
436 |
-
# # min_new_tokens=8,
|
437 |
-
# num_beams=1,
|
438 |
-
# image_start_index_list=image_start_index_list,
|
439 |
-
# image_nums=image_nums,
|
440 |
-
# )
|
441 |
boxes = outputs["boxes"]
|
442 |
scores = outputs["scores"]
|
443 |
if len(scores) > 0:
|
@@ -463,6 +452,8 @@ class Chat:
|
|
463 |
# print(
|
464 |
# f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
|
465 |
output_text = self.tokenizer.decode(text_outputs[0])
|
|
|
|
|
466 |
return output_text, out_image
|
467 |
|
468 |
def upload_img(self, image, conv, img_list):
|
|
|
287 |
elif radio in ["VQA"]:
|
288 |
conv.append({
|
289 |
"from": "human",
|
290 |
+
"value": f"Answer the question using a single word or phrase. {text}",
|
291 |
})
|
292 |
elif radio in ["REC"]:
|
293 |
conv.append({
|
|
|
368 |
conv.append(
|
369 |
{
|
370 |
"from": "gpt",
|
371 |
+
"value": object_token + text_input + end_token + visual_token,
|
372 |
}
|
373 |
)
|
374 |
else:
|
|
|
427 |
added_bbox_list=None,
|
428 |
add_box=False,
|
429 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
430 |
boxes = outputs["boxes"]
|
431 |
scores = outputs["scores"]
|
432 |
if len(scores) > 0:
|
|
|
452 |
# print(
|
453 |
# f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
|
454 |
output_text = self.tokenizer.decode(text_outputs[0])
|
455 |
+
output_text = re.findall(r'Assistant:(.+)', output_text)[-1]
|
456 |
+
|
457 |
return output_text, out_image
|
458 |
|
459 |
def upload_img(self, image, conv, img_list):
|