hsienchen commited on
Commit
b472dbe
1 Parent(s): 5be3d49
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PIL.Image
2
+ import gradio as gr
3
+ import base64
4
+ import time
5
+ import os
6
+ import google.generativeai as genai
7
+
8
+ import pathlib
9
+ from google.colab import userdata
10
+
11
+ txt_model = genai.GenerativeModel('gemini-pro')
12
+ vis_model = genai.GenerativeModel('gemini-pro-vision')
13
+
14
+ import os
15
+
16
+ GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')
17
+
18
+ genai.configure(api_key=GOOGLE_API_KEY)
19
+
20
+ # Image to Base 64 Converter
21
+ def image_to_base64(image_path):
22
+ with open(image_path, 'rb') as img:
23
+ encoded_string = base64.b64encode(img.read())
24
+ return encoded_string.decode('utf-8')
25
+
26
+ # Function that takes User Inputs and displays it on ChatUI
27
+ def query_message(history,txt,img):
28
+ if not img:
29
+ history += [(txt,None)]
30
+ return history
31
+ base64 = image_to_base64(img)
32
+ data_url = f"data:image/jpeg;base64,{base64}"
33
+ history += [(f"{txt} ![]({data_url})", None)]
34
+ return history
35
+
36
+ # Function that takes User Inputs, generates Response and displays on Chat UI
37
+ def llm_response(history,text,img):
38
+ if not img:
39
+ response = txt_model.generate_content(text)
40
+ history += [(None,response.text)]
41
+ return history
42
+
43
+ else:
44
+ img = PIL.Image.open(img)
45
+ response = vis_model.generate_content([text,img])
46
+ history += [(None,response.text)]
47
+ return history
48
+
49
+ # Interface Code- Selector method
50
+
51
+ def sentence_builder(animal, place):
52
+ return f"""how many {animal}s from the {place} are shown in the picture?"""
53
+
54
+ # gradio block
55
+
56
+ with gr.Blocks() as app:
57
+ with gr.Row():
58
+ image_box = gr.Image(type="filepath")
59
+
60
+ chatbot = gr.Chatbot(
61
+ scale = 2,
62
+ height=750
63
+ )
64
+ text_box = gr.Dropdown(
65
+ ["what is in the image", "provide alternative title for the image", "how many birds can be seen in the picture?"], label="Animal", info="Will add more animals later!"
66
+ )
67
+
68
+ btn = gr.Button("Submit")
69
+ clicked = btn.click(query_message,
70
+ [chatbot,text_box,image_box],
71
+ chatbot
72
+ ).then(llm_response,
73
+ [chatbot,text_box,image_box],
74
+ chatbot
75
+ )
76
+
77
+ app.queue()
78
+ app.launch()