PHI35VISION

Runtime error

App Files Files Community

PHI35VISION / app.py

aiqtech

Update app.py

c462fef verified 17 days ago

raw

history blame

2.22 kB

	import spaces
	import os
	import time
	import torch
	import gradio as gr
	from threading import Thread
	from PIL import Image

	# Install required packages
	import subprocess
	subprocess.run('pip install --upgrade transformers', shell=True)
	subprocess.run('pip install accelerate', shell=True)

	from transformers import AutoProcessor, AutoModelForVisionText2Text

	# Model and processor initialization with trust_remote_code=True
	processor = AutoProcessor.from_pretrained(
	"Qwen/QVQ-72B-Preview",
	trust_remote_code=True
	)

	model = AutoModelForVisionText2Text.from_pretrained(
	"Qwen/QVQ-72B-Preview",
	trust_remote_code=True,
	device_map="auto"
	).eval()

	# Footer
	footer = """
	<div style="text-align: center; margin-top: 20px;">
	<p>Powered by QVQ-72B Model</p>
	</div>
	"""

	# Vision model function
	@spaces.GPU()
	def process_image(image, text_input=None):
	try:
	# Convert image to PIL format
	image = Image.fromarray(image).convert("RGB")

	# Prepare inputs
	if text_input:
	inputs = processor(text=text_input, images=image, return_tensors="pt")
	else:
	inputs = processor(images=image, return_tensors="pt")

	# Move inputs to the same device as the model
	inputs = {k: v.to(model.device) for k, v in inputs.items()}

	# Generate output
	outputs = model.generate(**inputs, max_new_tokens=1000)

	# Decode response
	response = processor.batch_decode(outputs, skip_special_tokens=True)[0]

	return response
	except Exception as e:
	return f"Error processing image: {str(e)}"

	# CSS styling
	css = """
	footer {
	visibility: hidden;
	}
	"""

	# Gradio interface
	with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
	with gr.Row():
	input_img = gr.Image(label="Input Image")
	with gr.Row():
	text_input = gr.Textbox(label="Question (Optional)")
	with gr.Row():
	submit_btn = gr.Button(value="Submit")
	with gr.Row():
	output_text = gr.Textbox(label="Response")

	submit_btn.click(process_image, [input_img, text_input], [output_text])

	gr.HTML(footer)

	# Launch the app
	demo.launch(debug=True)