Find solution to use this version with text and image : fake_image ;)

#17
by martossien - opened

Not a good choice but i don't find other choice

import torch
from transformers import AutoProcessor, LlavaForConditionalGeneration
from transformers import BitsAndBytesConfig
from PIL import Image

Configuration de quantification en 4 bits

quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16
)

Chargement du modèle et du processeur avec la quantification en 4 bits

model_id = "mistral-community/pixtral-12b"
model = LlavaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.float16,
device_map="cuda:0", # Forcer l'utilisation de la 3090
quantization_config=quantization_config
)
processor = AutoProcessor.from_pretrained(model_id)

Fonction pour charger et traiter des images locales

def load_and_process_images(img_paths, target_size=(224, 224)):
images = []
for img_path in img_paths:
try:
img = Image.open(img_path).convert("RGB") # Convertir au format RGB
img = img.resize(target_size) # Redimensionner l'image
images.append(img)
except Exception as e:
print(f"Erreur lors du traitement de l'image {img_path} :", e)
return images

Test 1 : Description d'une image locale

try:
IMG_PATHS = ["robot.jpg"] # Chemin local de l'image
PROMPT = "[INST]Décrivez cette image.\n[IMG][/INST]"

images = load_and_process_images(IMG_PATHS)
inputs = processor(text=PROMPT, images=images, return_tensors="pt").to("cuda:0")

with torch.no_grad():
    generate_ids = model.generate(**inputs, max_new_tokens=500)
    output_description = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

print("Test 1 - Description d'une image locale :", output_description)

except Exception as e:
print("Erreur lors du Test 1 :", e)

Test 2 : Répondre à une question avec une image locale

try:
IMG_PATHS = ["fake_image.jpg"] # Chemin local de l'image
PROMPT = "[INST]Quelle est la capitale de la France ?\n[IMG][/INST]"

images = load_and_process_images(IMG_PATHS)
inputs = processor(text=PROMPT, images=images, return_tensors="pt").to("cuda:0")

with torch.no_grad():
    generate_ids = model.generate(**inputs, max_new_tokens=500)
    output_question = processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

print("Test 2 - Question avec une image locale :", output_question)

except Exception as e:
print("Erreur lors du Test 2 :", e)

Sign up or log in to comment