File size: 2,613 Bytes
930f89e f22cb33 8c29b70 f22cb33 ba9c0ad f22cb33 60361bd f22cb33 930f89e f22cb33 930f89e f22cb33 930f89e f22cb33 930f89e f22cb33 930f89e f22cb33 930f89e 60361bd 369c30a f22cb33 98556ba f22cb33 930f89e 98556ba 60361bd 98556ba 60361bd 98556ba 60361bd f22cb33 ba9c0ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# refer to repo https://github.com/gradio-app/gradio/blob/main/demo/chatbot_multimodal/run.ipynb for enhancement
import PIL.Image
import gradio as gr
import base64
import time
import os
import google.generativeai as genai
import pathlib
txt_model = genai.GenerativeModel('gemini-pro')
vis_model = genai.GenerativeModel('gemini-pro-vision')
txt_prompt_1 = """The image contains the contents of a letter. I'd like to follow the request mentioned in the letter. Please provide 3 actionable items to assist me. When responding, use the following format:
# Sender and Subject #
1- Action 1 (no more than 20 words)
2- Action 2 (no more than 20 words)
3- Action 3 (no more than 20 words)
For example:
# From Richard regarding 'Shipping to Customer ABC' #
1- Pack Product A
2- Ship before 3:00 PM today
3- Notify Richard after shipment
"""
txt_display_1 = 'content of the letter: '
import os
GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)
sms_text ="..."
# Image to Base 64 Converter
def image_to_base64(image_path):
with open(image_path, 'rb') as img:
encoded_string = base64.b64encode(img.read())
return encoded_string.decode('utf-8')
def app1_query(img):
if not img:
return txt_prompt_1
base64 = image_to_base64(img)
data_url = f"data:image/jpeg;base64,{base64}"
outputText = [(f"{txt_display_1} ![]({data_url})", None)]
return outputText
# Function that takes User Inputs, generates Response and displays on Chat UI
def app1_response(img):
if not img:
response = txt_model.generate_content(txt_prompt_1)
return response
else:
img = PIL.Image.open(img)
response = vis_model.generate_content([txt_prompt_1,img])
return response.text
def app2_response(text):
resp = text + "Simon"
return resp
# gradio block
with gr.Blocks() as app1:
with gr.Column():
image_box = gr.Image(type="filepath")
btn1 = gr.Button("Make a Plan")
out1 = gr.Textbox(label="here are the plans...")
btn2 = gr.Button("Send to My Mobile")
out2 = gr.Textbox(label="from SMS gateway...")
btn1.click(fn=app1_response, inputs=[image_box], outputs=out1)
btn2.click(fn=app2_response, inputs=out1, outputs=out2)
gr.Markdown("""
# Make a Plan #
- screen capture (Win + shift + S)
- click **Make a Plan** to upload
- await LLM Bot (Gemini, in this case) response
- receive THREE actionable items
[demo](https://youtu.be/lJ4jIAEVRNY)
""")
app1.queue()
app1.launch() |