Spaces:

chendl
/

compositional_test

Runtime error

App Files Files Community

chendl commited on Oct 29, 2023

Commit

e82e643

•

1 Parent(s): 098738d

update chat

Browse files

Files changed (2) hide show

app.py +8 -3
multimodal/open_flamingo/chat/conversation.py +31 -7

app.py CHANGED Viewed

@@ -248,12 +248,17 @@ def gradio_ask(user_message, chatbot, chat_state):
 def gradio_answer(chatbot, chat_state, img_list, num_beams, temperature):
-    llm_message = \
     chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
-                max_length=2000)[0]
     chatbot[-1][1] = llm_message
-    return chatbot, chat_state, img_list
 with gr.Blocks() as demo:

 def gradio_answer(chatbot, chat_state, img_list, num_beams, temperature):
+    llm_message,image = \
     chat.answer(conv=chat_state, img_list=img_list, max_new_tokens=300, num_beams=1, temperature=temperature,
+                max_length=2000)
     chatbot[-1][1] = llm_message
+    if image==None:
+        return chatbot, chat_state, img_list
+    else:
+        path = build_image(image)
+        chatbot = chatbot + [[(path,), None]]
 with gr.Blocks() as demo:

multimodal/open_flamingo/chat/conversation.py CHANGED Viewed

@@ -260,6 +260,12 @@ def preprocess_conv(data):
         conversation += (BEGIN_SIGNAL + from_str + ": " + d["value"] + END_SIGNAL)
     return conversation
 class Chat:
     def __init__(self, model, vis_processor, tokenizer, vis_embed_size ):
         self.model = model
@@ -322,6 +328,7 @@ class Chat:
         # "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/cdl/tmp_img/chat_vis/chat19.png"
         # image_path = input("Please enter the image path: ")
         image = img_list[0].convert("RGB")
         image = image.resize((size, size))
         print(f"image size: {image.size}")
         batch_images = preprocess_image(image, self.vis_processor).unsqueeze(0).unsqueeze(1).unsqueeze(0)
@@ -370,14 +377,31 @@ class Chat:
                 image_start_index_list=image_start_index_list,
                 image_nums=image_nums,
             )
-        output_token = outputs[0, input_ids.shape[1]:]
-        output_text = tokenizer.decode(output_token, skip_special_tokens=True).strip()
-        conv[-1]["value"] = output_text
-        # conv.messages[-1][1] = output_text
-        print(
-            f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
-        return output_text, output_token.cpu().numpy()
     def upload_img(self, image, conv, img_list):
         img_list.append(image)

         conversation += (BEGIN_SIGNAL + from_str + ": " + d["value"] + END_SIGNAL)
     return conversation
+def preprocess_image(sample, image_processor):
+    image = image_processor(sample)
+    if isinstance(image, transformers.image_processing_utils.BatchFeature):
+        image = torch.tensor(image["pixel_values"][0])
+    return image
 class Chat:
     def __init__(self, model, vis_processor, tokenizer, vis_embed_size ):
         self.model = model
         # "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/cdl/tmp_img/chat_vis/chat19.png"
         # image_path = input("Please enter the image path: ")
         image = img_list[0].convert("RGB")
+        image_ori = image
         image = image.resize((size, size))
         print(f"image size: {image.size}")
         batch_images = preprocess_image(image, self.vis_processor).unsqueeze(0).unsqueeze(1).unsqueeze(0)
                 image_start_index_list=image_start_index_list,
                 image_nums=image_nums,
             )
+        boxes = outputs["boxes"]
+        scores = outputs["scores"]
+        if len(scores) > 0:
+            box = boxes[scores.argmax()] / 224
+        print(f"{box}")
+        out_image = None
+        if len(boxes)>0:
+            open_cv_image = np.array(image_ori)
+            # Convert RGB to BGR
+            open_cv_image = open_cv_image[:, :, ::-1].copy()
+            box = box * [width, height, width, height]
+            # for box in boxes:
+            open_cv_image = cv2.rectangle(open_cv_image, box[:2].astype(int), box[2:].astype(int), (255, 0, 0), 2)
+            out_image = Image.fromarray(cv2.cvtColor(open_cv_image, cv2.COLOR_BGR2RGB))
+        # output_token = outputs[0, input_ids.shape[1]:]
+        # output_text = tokenizer.decode(output_token, skip_special_tokens=True).strip()
+        # conv[-1]["value"] = output_text
+        # # conv.messages[-1][1] = output_text
+        # print(
+        #     f"### Assistant: {tokenizer.decode(outputs[0, input_ids.shape[1]:], skip_special_tokens=True).strip()}")
+        output_text = "here"
+        return output_text, out_image
     def upload_img(self, image, conv, img_list):
         img_list.append(image)