Find solution to use this version with text and image : fake_image ;)
Not a good choice but i don't find other choice
import torch
from transformers import AutoProcessor, LlavaForConditionalGeneration
from transformers import BitsAndBytesConfig
from PIL import Image
Configuration de quantification en 4 bits
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16
)
Chargement du modèle et du processeur avec la quantification en 4 bits
model_id = "mistral-community/pixtral-12b"
model = LlavaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float16,
device_map="cuda:0", # Forcer l'utilisation de la 3090
quantization_config=quantization_config
)
processor = AutoProcessor.from_pretrained(model_id)
Fonction pour charger et traiter des images locales
def load_and_process_images(img_paths, target_size=(224, 224)):
images = []
for img_path in img_paths:
try:
img = Image.open(img_path).convert("RGB") # Convertir au format RGB
img = img.resize(target_size) # Redimensionner l'image
images.append(img)
except Exception as e:
print(f"Erreur lors du traitement de l'image {img_path} :", e)
return images
Test 1 : Description d'une image locale
try:
IMG_PATHS = ["robot.jpg"] # Chemin local de l'image
PROMPT = "[INST]Décrivez cette image.\n[IMG][/INST]"
images = load_and_process_images(IMG_PATHS)
inputs = processor(text=PROMPT, images=images, return_tensors="pt").to("cuda:0")
with torch.no_grad():
generate_ids = model.generate(**inputs, max_new_tokens=500)
output_description = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
print("Test 1 - Description d'une image locale :", output_description)
except Exception as e:
print("Erreur lors du Test 1 :", e)
Test 2 : Répondre à une question avec une image locale
try:
IMG_PATHS = ["fake_image.jpg"] # Chemin local de l'image
PROMPT = "[INST]Quelle est la capitale de la France ?\n[IMG][/INST]"
images = load_and_process_images(IMG_PATHS)
inputs = processor(text=PROMPT, images=images, return_tensors="pt").to("cuda:0")
with torch.no_grad():
generate_ids = model.generate(**inputs, max_new_tokens=500)
output_question = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
print("Test 2 - Question avec une image locale :", output_question)
except Exception as e:
print("Erreur lors du Test 2 :", e)