""" app.py """ import base64 from io import BytesIO import gradio as gr from groq import Groq # Initialize Groq client client = Groq() # Function to encode the image in base64 def encode_image_to_base64(image): # Convert PIL image to base64 string buffered = BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode('utf-8') # Function to process the uploaded image and extract receipt information def extract_receipt_info(image): # Encode the image to base64 base64_image = encode_image_to_base64(image) # Send request to Groq API chat_completion = client.chat.completions.create( model="llama-3.2-11b-vision-preview", messages=[ { "role": "user", "content": [ { "type": "text", "text": "Your task is to extract key information from the provided receipt image.\n\nONLY reply with a table.\n\Follow this schema:\n1. item (str), name of the item\n2. price (float), price of the item\n3. quantity (int), quantity of the item\n4. total (float), total cost for the item.\n\nRemember to just end with the table." }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}", } } ] }, { "role": "assistant", "content": "```markdown" } ], temperature=0.1, max_tokens=8192, top_p=1, stop="```" ) # Return the response from the model return chat_completion.choices[0].message.content # Create the Gradio app def gradio_app(): # Gradio interface gr.Interface( fn=extract_receipt_info, inputs=gr.Image(type="pil", label="Upload Receipt Image"), outputs=gr.Markdown(), title="Receipt Information Extractor", description="Upload a receipt image and the model will extract the items, quantities, and prices from the receipt." ).launch() # Start the Gradio app if __name__ == "__main__": gradio_app()