#!/usr/bin/env python # encoding: utf-8 import spaces import torch import argparse from transformers import AutoModel, AutoTokenizer import gradio as gr from PIL import Image import os import traceback # Argparser parser = argparse.ArgumentParser(description='Food Calcium Analysis Demo') parser.add_argument('--device', type=str, default='cuda', help='cuda or mps') args = parser.parse_args() device = args.device assert device in ['cuda', 'mps'] # Load model model_path = 'openbmb/MiniCPM-V-2_6' model = AutoModel.from_pretrained(model_path, trust_remote_code=True, torch_dtype=torch.bfloat16) model = model.to(device=device) tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) model.eval() ERROR_MSG = "Error, please retry" model_name = 'Food Calcium Analyzer' def encode_image(image): if not isinstance(image, Image.Image): image = Image.open(image).convert("RGB") max_size = 448*16 if max(image.size) > max_size: w, h = image.size if w > h: new_w, new_h = max_size, int(h * max_size / w) else: new_h, new_w = max_size, int(w * max_size / h) image = image.resize((new_w, new_h), resample=Image.BICUBIC) return image @spaces.GPU(duration=120) def chat(image, prompt, params=None): try: system_prompt = "You are an AI assistant specialized in analyzing food images and estimating calcium content. For the given image, provide the following information: 1) Name of the food, 2) Estimated quantity (e.g., 1 plate, 100 grams), 3) Estimated calcium content in milligrams. Be as accurate and specific as possible." user_message = f"{system_prompt}\n\nUser: {prompt}" messages = [ {"role": "user", "content": [encode_image(image), user_message]} ] answer = model.chat( image=None, msgs=messages, tokenizer=tokenizer, **params ) return process_answer(answer) except Exception as e: print(e) traceback.print_exc() return ERROR_MSG def process_answer(answer): lines = answer.split('\n') formatted_lines = [] for line in lines: if line.startswith(("Food:", "Quantity:", "Calcium:")): formatted_lines.append(line) return "\n".join(formatted_lines) if formatted_lines else answer def analyze_food(image): if image is None: return "Please upload a food image." params = { 'sampling': True, 'top_p': 0.8, 'top_k': 100, 'temperature': 0.7, 'repetition_penalty': 1.05, "max_new_tokens": 2048, "max_inp_length": 4352 } result = chat(image, "Analyze this food image", params) return result css = """ .example label { font-size: 16px; } """ introduction = """ ## Food Calcium Analyzer This app analyzes a food image to estimate its calcium content. Features: 1. Upload a single food image 2. Get food name, estimated quantity, and calcium content 3. Accurate analysis using AI Upload your food image below to start the analysis. """ with gr.Blocks(css=css) as demo: gr.Markdown(value=introduction) with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload Food Image") analyze_button = gr.Button("Analyze Food") with gr.Column(): result_output = gr.Textbox(label="Analysis Result", lines=10) analyze_button.click(analyze_food, inputs=[image_input], outputs=[result_output]) with gr.Accordion("How to use", open=False): gr.Markdown(""" 1. Click on the 'Upload Food Image' area or drag and drop your image. 2. Once the image is uploaded, click the 'Analyze Food' button. 3. Wait for the AI to process the image. 4. View the results, which include: - Name of the food - Estimated quantity - Estimated calcium content in milligrams Note: For best results, use a clear image of a single food item or meal. """) demo.launch()