todo / app.py
hsienchen's picture
Update app.py
8e025f4 verified
raw
history blame
4.9 kB
# refer to repo https://github.com/gradio-app/gradio/blob/main/demo/chatbot_multimodal/run.ipynb for enhancement
import PIL.Image
import gradio as gr
import base64
import time
import os
import google.generativeai as genai
import requests
import pathlib
txt_model = genai.GenerativeModel('gemini-pro')
vis_model = genai.GenerativeModel('gemini-pro-vision')
txt_prompt_1 = """The image contains the contents of a letter. I'd like to follow the request mentioned in the letter. Please provide 3 actionable items to assist me. When responding, use the following format:
# Sender and Subject #
1- Action 1 (no more than 20 words)
2- Action 2 (no more than 20 words)
3- Action 3 (no more than 20 words)
For example:
# From Richard regarding 'Shipping to Customer ABC' #
1- Pack Product A
2- Ship before 3:00 PM today
3- Notify Richard after shipment
"""
txt_display_1 = 'content of the letter: '
txt_prompt_2 = """The image contains an inventory stock level table with Item Id, Quantity, ROQ, Item Images, and Contact Phone. For reference, ROQ stands for Replenishment Order Quantity.
It is for use of reorder, that is, when quantity level is below ROQ, a reorder process will need to take place. Please provide a list of reorder items for all below ROQ. When responding please follow the below formats:
** ROQ ({today's date})**
Item ID, Shortage (Quantity - ROQ), Contact Phone
(for all below ROQ items)
For example:
** ROQ (02/21/2024) **
#11608, 70 (30-100), 1-858-7331029
#61785, 5 (5-10), 1-858-1233
"""
txt_display_2 = '--- '
import os
GOOGLE_API_KEY=os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)
SMS_URL =os.getenv('SMS_URL')
SMS_TOK =os.getenv('SMS_TOK')
sms_text ="..."
# Image to Base 64 Converter
def image_to_base64(image_path):
with open(image_path, 'rb') as img:
encoded_string = base64.b64encode(img.read())
return encoded_string.decode('utf-8')
# Function that takes User Inputs, generates Response and displays on Chat UI
def app1_response(img):
if not img:
response = txt_model.generate_content(txt_prompt_1)
return response
else:
img = PIL.Image.open(img)
response = vis_model.generate_content([txt_prompt_1,img])
return response.text
def app2_response(img):
if not img:
response = txt_model.generate_content(txt_prompt_2)
return response
else:
img = PIL.Image.open(img)
response = vis_model.generate_content([txt_prompt_2,img])
return response.text
# SMS service ends in March 2024, to restore service @Sinch Simple Text
def send_SMS(resp_text):
url = SMS_URL
headers = {
"Authorization": SMS_TOK,
"Content-Type": "application/json"
}
data = {
"from": "12085686834",
"to": ["18587331029"],
"body": resp_text
}
response = requests.post(url, json=data, headers=headers)
return response.text
# gradio block
with gr.Blocks() as app1:
with gr.Column():
gr.Markdown("## πŸ‘» Workflow Genie ##")
gr.Markdown("```for email βœ‰ and/or assigment descriptions ⌦, paste screenshot here...```")
image_box = gr.Image(label="email screen", type="filepath")
btn1 = gr.Button("generate To-Dos β˜‘")
out1 = gr.Textbox(label="here are the actionables...")
btn2 = gr.Button("Send to Mobile βœ‚")
out2 = gr.Textbox(label="response from SMS gateway...")
btn1.click(fn=app1_response, inputs=[image_box], outputs=out1)
btn2.click(fn=send_SMS, inputs=out1, outputs=out2)
gr.Markdown("""
# Make a Plan #
- screen capture (Win + shift + S)
- click **Make a Plan** to upload
- await LLM Bot (Gemini, in this case) response
- receive THREE actionable items
[demo](https://youtu.be/lJ4jIAEVRNY)
""")
with gr.Blocks() as app2:
with gr.Column():
gr.Markdown("## πŸ₯· Stock-Out Scarebot ##")
gr.Markdown("```Win+Screenshot, paste ERP Inv ⌨ screenshot here...```")
image_box = gr.Image(label="ERP/Inventory screen",type="filepath")
btn1 = gr.Button("check ROQ πŸ›’ ")
out1 = gr.Textbox(label="here is the watch list...")
btn2 = gr.Button("send out reminders β˜‘")
out2 = gr.Textbox(label="response or feed back?")
btn1.click(fn=app2_response, inputs=[image_box], outputs=out1)
btn2.click(fn=send_SMS, inputs=out1, outputs=out2)
gr.Markdown("""
# Make a Plan #
- screen capture (Win + shift + S)
- click **Make a Plan** to upload
- await LLM Bot (Gemini, in this case) response
- receive THREE actionable items
[demo](https://youtu.be/lJ4jIAEVRNY)
""")
with gr.Blocks() as demo:
gr.Markdown("## Workflow Bot ##")
gr.TabbedInterface([app1, app2], ["To-Dos", "SOS"])
demo.queue()
demo.launch()