Spaces:

hgdgng
/

HG_Llama3.2

Runtime error

App Files Files Community

HG_Llama3.2 / app.py

hgdgng

Update app.py

903f1a6 verified about 2 months ago

raw

history blame

1.61 kB

	import requests
	import torch
	from PIL import Image
	from transformers import LlamaForConditionalGeneration, AutoProcessor

	# Define the model ID, replace with the correct ID if needed
	model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"

	# Load the model in bfloat16 or float16 if needed
	model = LlamaForConditionalGeneration.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16, # Change to torch.float16 if hardware doesn't support bfloat16
	device_map="auto", # Automatically selects the appropriate device
	)

	# Load the processor
	processor = AutoProcessor.from_pretrained(model_id)

	# Define an image URL
	url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"

	# Fetch the image using requests
	image = Image.open(requests.get(url, stream=True).raw)

	# Define the messages in a format the model understands (adjust as needed)
	messages = [
	{"role": "user", "content": [
	{"type": "image"}, # This indicates that the input contains an image
	{"type": "text", "text": "Can you please describe this image in one sentence?"}
	]}
	]

	# Generate input text with the processor
	input_text = processor.apply_chat_template(messages, add_generation_prompt=True)

	# Process the image and input text, prepare them for the model
	inputs = processor(image, input_text, return_tensors="pt").to(model.device)

	# Run the model to generate a response
	output = model.generate(**inputs, max_new_tokens=70)

	# Decode and print the output
	print(processor.decode(output[0][inputs["input_ids"].shape[-1]:]))