PixDiet

Running on Zero

App Files Files Community

PixDiet / app.py

blanchon

Fix messages

14a9542 verified 16 days ago

raw

history blame contribute delete

9.94 kB

	import spaces
	from transformers import (
	TextIteratorStreamer,
	)
	from transformers import (
	AutoProcessor,
	BitsAndBytesConfig,
	LlavaForConditionalGeneration,
	)
	from PIL import Image
	import gradio as gr
	from threading import Thread
	from dotenv import load_dotenv

	# Add these imports
	from datetime import datetime
	import pytz
	from typing import Optional
	from transformers import AutoModelForCausalLM, CodeGenTokenizerFast as Tokenizer
	import torch
	from theme import Seafoam


	load_dotenv()

	# Add TESTING variable
	TESTING = False

	# Hugging Face model id
	# model_id = "mistral-community/pixtral-12b"
	model_id = "blanchon/PixDiet-pixtral-nutrition-v2"

	# BitsAndBytesConfig int-4 config
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16,
	)

	# Modify the model and processor initialization
	if TESTING:
	model_id = "vikhyatk/moondream1"
	model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
	processor = Tokenizer.from_pretrained(model_id)
	else:
	model = LlavaForConditionalGeneration.from_pretrained(
	model_id,
	device_map="auto",
	torch_dtype=torch.bfloat16,
	quantization_config=bnb_config,
	)
	processor = AutoProcessor.from_pretrained(model_id)

	# Set the chat template for the tokenizer
	processor.chat_template = """
	{%- for message in messages %}
	{%- if message.role == "user" %}
	<s>[INST]
	{%- for item in message.content %}
	{%- if item.type == "text" %}
	{{ item.text }}
	{%- elif item.type == "image" %}
	\n[IMG]
	{%- endif %}
	{%- endfor %}
	[/INST]
	{%- elif message.role == "assistant" %}
	{%- for item in message.content %}
	{%- if item.type == "text" %}
	{{ item.text }}
	{%- endif %}
	{%- endfor %}
	</s>
	{%- endif %}
	{%- endfor %}
	""".replace(" ", "")

	processor.tokenizer.pad_token = processor.tokenizer.eos_token


	@spaces.GPU
	def bot_streaming(chatbot, image_input, max_new_tokens=250):
	# Preprocess inputs
	messages = []
	images = []
	text_input = chatbot[-1][0]

	# Get current time in Paris timezone
	paris_tz = pytz.timezone("Europe/Paris")
	current_time = datetime.now(paris_tz).strftime("%I:%M%p")

	if text_input != "":
	text_input = f"Current time: {current_time}. You are a nutrition expert. Identify the food/ingredients in this image. Is this a healthy meal? Can you think of how to improve it?"
	else:
	text_input = f"Current time: {current_time}. You are a nutrition expert. Identify the food/ingredients in this image. Is this a healthy meal? Can you think of how to improve it?"

	# Add current message
	if image_input is not None:
	# Check if image_input is already a PIL Image
	if isinstance(image_input, Image.Image):
	image = image_input.convert("RGB")
	else:
	image = Image.fromarray(image_input).convert("RGB")
	images.append(image)
	messages.append(
	{
	"role": "user",
	"content": [{"type": "text", "text": text_input}, {"type": "image"}],
	}
	)
	else:
	messages.append(
	{"role": "user", "content": [{"type": "text", "text": text_input}]}
	)

	# Apply chat template
	texts = processor.apply_chat_template(messages)

	# Process inputs
	if not images:
	inputs = processor(text=texts, return_tensors="pt").to("cuda")
	else:
	inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")

	streamer = TextIteratorStreamer(
	processor.tokenizer, skip_special_tokens=True, skip_prompt=True
	)

	generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)

	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	response = ""
	for new_text in streamer:
	response += new_text
	chatbot[-1][1] = response
	yield chatbot

	thread.join()

	# Debug output
	print("" 60)
	print("" 60)
	print("BOT_STREAMING_CONV_START")
	for i, (request, answer) in enumerate(chatbot[:-1], 1):
	print(f"Q{i}:\n {request}")
	print(f"A{i}:\n {answer}")
	print("New_Q:\n", text_input)
	print("New_A:\n", response)
	print("BOT_STREAMING_CONV_END")

	new_history = messages + [
	{"role": "assistant", "content": [{"type": "text", "text": response}]}
	]


	seafoam = Seafoam()

	# Define the HTML content for the header
	html = """
	<!-- Foreground content -->
	<p align="center" style="font-size: 2.5em; line-height: 1; ">
	<span style="display: inline-block; vertical-align: middle;">🍽️</span>
	<span style="display: inline-block; vertical-align: middle;">PixDiet</span>
	</p>
	<center>
	<font size=3><b>PixDiet</b> is your AI nutrition expert. Upload an image of your meal and chat with our AI to get personalized advice on your diet, meal composition, and ways to improve your nutrition.</font>
	</center>
	<!-- Background image positioned behind everything -->
	<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; margin-top: 20px; width: 100%;">
	<div style="display: flex; justify-content: center; width: 100%;">
	<img src="https://dropshare.blanchon.xyz/public/dropshare/alan.png" alt="Alan AI Logo" style="height: 50px; margin-right: 20px;">
	<img src="https://dropshare.blanchon.xyz/public/dropshare/mistral-ai-icon-logo-B3319DCA6B-seeklogo.com.png" alt="Mistral AI Logo" style="height: 50px;">
	</div>
	</div>
	"""

	footer_html = """
	<!-- Footer content -->
	<div style="display: flex; flex-direction: column; justify-content: center; align-items: center; margin-top: 20px; width: 100%;">
	<div style="display: flex; justify-content: center; width: 100%;">
	<img src="https://dropshare.blanchon.xyz/public/dropshare//VariantVariant6-Photoroom.png" alt="Background Image"
	style="height: 100px; width: 100%; object-fit: scale-down;">
	</div>
	<div>
	Made with ❤️ during the Mistral AI x Alan Hackathon.
	</div>

	</div>
	"""


	# Define LaTeX delimiters
	latex_delimiters_set = [
	{"left": "\\(", "right": "\\)", "display": False},
	{"left": "\\begin{equation}", "right": "\\end{equation}", "display": True},
	{"left": "\\begin{align}", "right": "\\end{align}", "display": True},
	{"left": "\\begin{alignat}", "right": "\\end{alignat}", "display": True},
	{"left": "\\begin{gather}", "right": "\\end{gather}", "display": True},
	{"left": "\\begin{CD}", "right": "\\end{CD}", "display": True},
	{"left": "\\[", "right": "\\]", "display": True},
	]

	# Create the Gradio interface
	with gr.Blocks(
	title="PixDiet", theme=seafoam, css="footer{display:none !important}"
	) as demo:
	gr.HTML(html)

	with gr.Row():
	with gr.Column(scale=3):
	about_you = gr.Textbox(
	label="About you",
	placeholder="Add information about you here...",
	lines=3,
	interactive=True,
	)
	image_input = gr.Image(
	label="Upload your meal image", height=350, type="pil"
	)
	gr.Examples(
	examples=[
	[
	"./examples/mistral_breakfast.jpeg",
	"John, 45 years old, 80kg, lactose intolerant. Training for his first triathlon.",
	],
	[
	"./examples/mistral_desert.jpeg",
	"Emma, 26 years old, 55kg, iron deficiency. Training for her first Ironman competition.",
	],
	[
	"./examples/mistral_snacks.jpeg",
	"Paul, 34 years old, 62kg, no known pathologies. Focused on improving strength for weightlifting competitions.",
	],
	[
	"./examples/mistral_pasta.jpeg",
	"Carla, 52 years old, 58kg, no known pathologies. Currently training for her first marathon.",
	],
	],
	inputs=[image_input, about_you],
	)
	with gr.Column(scale=7):
	chatbot = gr.Chatbot(
	label="Chat with PixDiet",
	layout="panel",
	height=700,
	show_copy_button=True,
	latex_delimiters=latex_delimiters_set,
	type=None,
	)
	text_input = gr.Textbox(
	label="Ask about your meal",
	placeholder="(Optional) Enter your message here...",
	lines=1,
	container=False,
	interactive=True,
	)
	with gr.Row():
	send_btn = gr.Button("Send", variant="primary", visible=True)
	clear_btn = gr.Button(
	"Delete my history",
	variant="stop",
	visible=True,
	)

	def submit_chat(chatbot, text_input):
	response = ""
	chatbot.append((text_input, response))
	return chatbot, ""

	def clear_chat():
	return [], None, ""


	send_click_event = send_btn.click(
	submit_chat, [chatbot, text_input], [chatbot, text_input]
	).then(bot_streaming, [chatbot, image_input], chatbot)
	submit_event = text_input.submit(
	submit_chat, [chatbot, text_input], [chatbot, text_input]
	).then(bot_streaming, [chatbot, image_input], chatbot)
	clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])

	gr.HTML(footer_html)

	if __name__ == "__main__":
	demo.launch(debug=False, share=False, show_api=False)