Vintern-1B-v3.5-Demo

Running on Zero

App Files Files Community

khang119966 commited on 2 days ago

Commit

8d20c43

verified ·

1 Parent(s): faa07dc

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -4

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import torchvision.transforms as T
 from PIL import Image
 from torchvision.transforms.functional import InterpolationMode
 from transformers import AutoModel, AutoTokenizer
 from threading import Thread
 import re
@@ -91,8 +92,31 @@ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbna
         processed_images.append(thumbnail_img)
     return processed_images
 def load_image(image_file, input_size=448, max_num=12):
-    image = Image.open(image_file).convert('RGB')
     print("Image size: ", image.size)
     transform = build_transform(input_size=input_size)
     images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
@@ -107,8 +131,7 @@ model = AutoModel.from_pretrained(
     trust_remote_code=True,
 ).eval().cuda()
 tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-1B-v3_5", trust_remote_code=True, use_fast=False)
 @spaces.GPU
 def chat(message, history):
     print("history",history)
@@ -133,7 +156,7 @@ We currently only support one image at the start of the context! Please start a
         pixel_values = None
-    generation_config = dict(max_new_tokens= 1024, do_sample=False, num_beams = 3, repetition_penalty=1.5)
     if len(history) == 0:
         if pixel_values is not None:

 from PIL import Image
 from torchvision.transforms.functional import InterpolationMode
 from transformers import AutoModel, AutoTokenizer
+from PIL import Image, ExifTags
 from threading import Thread
 import re
         processed_images.append(thumbnail_img)
     return processed_images
+def correct_image_orientation(image_path):
+    # Mở ảnh
+    image = Image.open(image_path)
+    # Kiểm tra dữ liệu Exif (nếu có)
+    try:
+        exif = image._getexif()
+        if exif is not None:
+            for tag, value in exif.items():
+                if ExifTags.TAGS.get(tag) == "Orientation":
+                    # Sửa hướng dựa trên Orientation
+                    if value == 3:
+                        image = image.rotate(180, expand=True)
+                    elif value == 6:
+                        image = image.rotate(-90, expand=True)
+                    elif value == 8:
+                        image = image.rotate(90, expand=True)
+                    break
+    except Exception as e:
+        print("Không thể xử lý Exif:", e)
+    return image
 def load_image(image_file, input_size=448, max_num=12):
+    image = correct_image_orientation(image_file).convert('RGB')
     print("Image size: ", image.size)
     transform = build_transform(input_size=input_size)
     images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
     trust_remote_code=True,
 ).eval().cuda()
 tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-1B-v3_5", trust_remote_code=True, use_fast=False)
 @spaces.GPU
 def chat(message, history):
     print("history",history)
         pixel_values = None
+    generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
     if len(history) == 0:
         if pixel_values is not None: