Spaces:
Running
on
Zero
Running
on
Zero
khang119966
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import torchvision.transforms as T
|
|
10 |
from PIL import Image
|
11 |
from torchvision.transforms.functional import InterpolationMode
|
12 |
from transformers import AutoModel, AutoTokenizer
|
|
|
13 |
|
14 |
from threading import Thread
|
15 |
import re
|
@@ -91,8 +92,31 @@ def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbna
|
|
91 |
processed_images.append(thumbnail_img)
|
92 |
return processed_images
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
def load_image(image_file, input_size=448, max_num=12):
|
95 |
-
image =
|
96 |
print("Image size: ", image.size)
|
97 |
transform = build_transform(input_size=input_size)
|
98 |
images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
|
@@ -107,8 +131,7 @@ model = AutoModel.from_pretrained(
|
|
107 |
trust_remote_code=True,
|
108 |
).eval().cuda()
|
109 |
tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-1B-v3_5", trust_remote_code=True, use_fast=False)
|
110 |
-
|
111 |
-
|
112 |
@spaces.GPU
|
113 |
def chat(message, history):
|
114 |
print("history",history)
|
@@ -133,7 +156,7 @@ We currently only support one image at the start of the context! Please start a
|
|
133 |
pixel_values = None
|
134 |
|
135 |
|
136 |
-
generation_config = dict(max_new_tokens=
|
137 |
|
138 |
if len(history) == 0:
|
139 |
if pixel_values is not None:
|
|
|
10 |
from PIL import Image
|
11 |
from torchvision.transforms.functional import InterpolationMode
|
12 |
from transformers import AutoModel, AutoTokenizer
|
13 |
+
from PIL import Image, ExifTags
|
14 |
|
15 |
from threading import Thread
|
16 |
import re
|
|
|
92 |
processed_images.append(thumbnail_img)
|
93 |
return processed_images
|
94 |
|
95 |
+
def correct_image_orientation(image_path):
|
96 |
+
# Mở ảnh
|
97 |
+
image = Image.open(image_path)
|
98 |
+
|
99 |
+
# Kiểm tra dữ liệu Exif (nếu có)
|
100 |
+
try:
|
101 |
+
exif = image._getexif()
|
102 |
+
if exif is not None:
|
103 |
+
for tag, value in exif.items():
|
104 |
+
if ExifTags.TAGS.get(tag) == "Orientation":
|
105 |
+
# Sửa hướng dựa trên Orientation
|
106 |
+
if value == 3:
|
107 |
+
image = image.rotate(180, expand=True)
|
108 |
+
elif value == 6:
|
109 |
+
image = image.rotate(-90, expand=True)
|
110 |
+
elif value == 8:
|
111 |
+
image = image.rotate(90, expand=True)
|
112 |
+
break
|
113 |
+
except Exception as e:
|
114 |
+
print("Không thể xử lý Exif:", e)
|
115 |
+
|
116 |
+
return image
|
117 |
+
|
118 |
def load_image(image_file, input_size=448, max_num=12):
|
119 |
+
image = correct_image_orientation(image_file).convert('RGB')
|
120 |
print("Image size: ", image.size)
|
121 |
transform = build_transform(input_size=input_size)
|
122 |
images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
|
|
|
131 |
trust_remote_code=True,
|
132 |
).eval().cuda()
|
133 |
tokenizer = AutoTokenizer.from_pretrained("5CD-AI/Vintern-1B-v3_5", trust_remote_code=True, use_fast=False)
|
134 |
+
|
|
|
135 |
@spaces.GPU
|
136 |
def chat(message, history):
|
137 |
print("history",history)
|
|
|
156 |
pixel_values = None
|
157 |
|
158 |
|
159 |
+
generation_config = dict(max_new_tokens= 700, do_sample=False, num_beams = 3, repetition_penalty=2.5)
|
160 |
|
161 |
if len(history) == 0:
|
162 |
if pixel_values is not None:
|