Spaces:

hsienchen
/

git-2-HF

Sleeping

App Files Files Community

hsien chen commited on Jan 19

Commit

ce830c4

•

1 Parent(s): 98f3187

HF

Browse files

Files changed (2) hide show

app.py +122 -10
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,16 +1,128 @@
-from transformers import pipeline
 import gradio as gr
-model = pipeline(
-    "summarization",
-)
-def predict(prompt):
-    summary = model(prompt)[0]["summary_text"]
-    return summary
-# create an interface for the model
-with gr.Interface(predict, "textbox", "text") as interface:
-    interface.launch()

+import PIL.Image
 import gradio as gr
+import base64
+import time
+import os
+import google.generativeai as genai
+import pathlib
+txt_model = genai.GenerativeModel('gemini-pro')
+vis_model = genai.GenerativeModel('gemini-pro-vision')
+import os
+GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')
+genai.configure(api_key=GOOGLE_API_KEY)
+# Image to Base 64 Converter
+def image_to_base64(image_path):
+    with open(image_path, 'rb') as img:
+        encoded_string = base64.b64encode(img.read())
+    return encoded_string.decode('utf-8')
+# Function that takes User Inputs and displays it on ChatUI
+def query_message(history,txt,img):
+    if not img:
+        history += [(txt,None)]
+        return history
+    base64 = image_to_base64(img)
+    data_url = f"data:image/jpeg;base64,{base64}"
+    history += [(f"{txt} ![]({data_url})", None)]
+    return history
+# Function that takes User Inputs, generates Response and displays on Chat UI
+def llm_response(history,text,img):
+    if not img:
+        response = txt_model.generate_content(text)
+        history += [(None,response.text)]
+        return history
+    else:
+        img = PIL.Image.open(img)
+        response = vis_model.generate_content([text,img])
+        history += [(None,response.text)]
+        return history
+# Function that takes User Inputs and displays it on ChatUI
+text_box_01 = "what is in the image"
+def output_query_message(img):
+    if not img:
+        return text_box_01
+    base64 = image_to_base64(img)
+    data_url = f"data:image/jpeg;base64,{base64}"
+    outputText = [(f"{text_box_01} ![]({data_url})", None)]
+    return outputText
+# Function that takes User Inputs, generates Response and displays on Chat UI
+def output_llm_response(img):
+    if not img:
+        response = txt_model.generate_content(text_box_01)
+        return response.text
+    else:
+        img = PIL.Image.open(img)
+        response = vis_model.generate_content([text_box_01,img])
+        return response.text
+# Interface Code- Selector method
+def sentence_builder(animal, place):
+    return f"""how many {animal}s from the {place} are shown in the picture?"""
+# gradio block
+with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
+    with gr.Column():
+        outputbox = gr.Textbox(label="line clearance...")
+        image_box = gr.Image(type="filepath")
+    btn = gr.Button("Check This")
+    clicked = btn.click(output_query_message,
+                        [image_box],
+                        outputbox
+                        ).then(output_llm_response,
+                                [image_box],
+                                outputbox
+                                )
+    gr.Markdown("""
+    ## SOP-302: Line Clearance ##
+    <h5 align="center"><i>"XXXX here here."</i></h5>
+    Multimodal-CoT incorporates vision features in a decoupled training framework. The framework consists of two training stages: (i) rationale generation and (ii) answer inference. Both stages share the same model architecture but differ in the input and output.
+    """)
+with gr.Blocks(theme='snehilsanyal/scikit-learn') as app2:
+    gr.Markdown("## MM 2BB ##")
+    with gr.Row():
+        image_box = gr.Image(type="filepath")
+        chatbot = gr.Chatbot(
+            scale = 2,
+            height=750
+        )
+    text_box = gr.Dropdown(
+                ["what is in the image",
+                 "provide alternative title for the image",
+                 "how many birds can be seen in the picture?"],
+                 label="Select--",
+                 info="Will add more animals later!"
+            )
+    btn = gr.Button("Submit")
+    clicked = btn.click(query_message,
+                        [chatbot,text_box,image_box],
+                        chatbot
+                        ).then(llm_response,
+                                [chatbot,text_box],
+                                chatbot
+                                )
+with gr.Blocks(theme='snehilsanyal/scikit-learn') as demo:
+    gr.Markdown("## SOP Camera ##")
+    gr.TabbedInterface([app1, app2], ["Check #1", "Check #2"])
+demo.queue()
+demo.launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 gradio
 transformers
-tensorflow

 gradio
 transformers
+tensorflow
+google-generativeai
+Pillow