Spaces:
Runtime error
Runtime error
VictorSanh
commited on
Commit
•
7df19dd
1
Parent(s):
f10b974
Update visualization
Browse files- app_bis.py +7 -9
- app_dialogue.py +32 -71
app_bis.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import logging
|
2 |
import os
|
3 |
import re
|
4 |
-
|
5 |
import time
|
6 |
from io import BytesIO
|
7 |
|
@@ -10,7 +9,6 @@ import requests
|
|
10 |
import torch
|
11 |
import transformers
|
12 |
from accelerate.utils import get_max_memory
|
13 |
-
|
14 |
from joblib import Parallel, delayed
|
15 |
from PIL import Image
|
16 |
from transformers import AutoTokenizer
|
@@ -699,17 +697,17 @@ with gr.Blocks() as demo:
|
|
699 |
converted into real newline characters.
|
700 |
See examples and additional details below.""")
|
701 |
|
702 |
-
#gr.HTML("<h3 align='center'>Help to write prompts:🙌</h3><br>Put the urls to the images inside the image tokens, it will be converted into the real image tokens. Put <fake_token_around_image> before and after each image token WITHOUT space. The texts \\n will be converted into real newline characters. See examples and additional details below.")
|
703 |
-
#gr.Markdown(MSG_MAIN)
|
704 |
-
#with gr.Row():
|
705 |
-
#with gr.Column():
|
706 |
gr.Markdown("## Input")
|
707 |
with gr.Row():
|
708 |
if not IS_MAIN_SPACE:
|
709 |
images = gr.File(label="Images", file_count="multiple")
|
710 |
prompt = gr.Textbox(label="Prompt", placeholder="Enter the prompt here", lines=5)
|
711 |
|
712 |
-
#gr.Markdown("## Common parameters to all decoding strategy")
|
713 |
with gr.Row():
|
714 |
with gr.Accordion("Common parameters to all decoding strategy", open=False, elem_id="common_params"):
|
715 |
temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Softmax temperature")
|
@@ -751,7 +749,7 @@ with gr.Blocks() as demo:
|
|
751 |
label="Stop generation when an image token, a bos or a eos token is generated", value=False
|
752 |
)
|
753 |
|
754 |
-
#gr.Markdown("## Decoding strategy and its specific parameters")
|
755 |
with gr.Accordion("Decoding strategy and its specific parameters", open=False, elem_id="decoding_params"):
|
756 |
decoding_strategy = gr.Dropdown(
|
757 |
["greedy", "beam_search", "beam_sampling", "sampling_top_k", "sampling_top_p", "contrastive_sampling"],
|
@@ -793,7 +791,7 @@ with gr.Blocks() as demo:
|
|
793 |
|
794 |
submit = gr.Button(label="Generate")
|
795 |
|
796 |
-
#with gr.Column():
|
797 |
with gr.Row():
|
798 |
if IS_MAIN_SPACE:
|
799 |
outputs = [
|
|
|
1 |
import logging
|
2 |
import os
|
3 |
import re
|
|
|
4 |
import time
|
5 |
from io import BytesIO
|
6 |
|
|
|
9 |
import torch
|
10 |
import transformers
|
11 |
from accelerate.utils import get_max_memory
|
|
|
12 |
from joblib import Parallel, delayed
|
13 |
from PIL import Image
|
14 |
from transformers import AutoTokenizer
|
|
|
697 |
converted into real newline characters.
|
698 |
See examples and additional details below.""")
|
699 |
|
700 |
+
# gr.HTML("<h3 align='center'>Help to write prompts:🙌</h3><br>Put the urls to the images inside the image tokens, it will be converted into the real image tokens. Put <fake_token_around_image> before and after each image token WITHOUT space. The texts \\n will be converted into real newline characters. See examples and additional details below.")
|
701 |
+
# gr.Markdown(MSG_MAIN)
|
702 |
+
# with gr.Row():
|
703 |
+
# with gr.Column():
|
704 |
gr.Markdown("## Input")
|
705 |
with gr.Row():
|
706 |
if not IS_MAIN_SPACE:
|
707 |
images = gr.File(label="Images", file_count="multiple")
|
708 |
prompt = gr.Textbox(label="Prompt", placeholder="Enter the prompt here", lines=5)
|
709 |
|
710 |
+
# gr.Markdown("## Common parameters to all decoding strategy")
|
711 |
with gr.Row():
|
712 |
with gr.Accordion("Common parameters to all decoding strategy", open=False, elem_id="common_params"):
|
713 |
temperature = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Softmax temperature")
|
|
|
749 |
label="Stop generation when an image token, a bos or a eos token is generated", value=False
|
750 |
)
|
751 |
|
752 |
+
# gr.Markdown("## Decoding strategy and its specific parameters")
|
753 |
with gr.Accordion("Decoding strategy and its specific parameters", open=False, elem_id="decoding_params"):
|
754 |
decoding_strategy = gr.Dropdown(
|
755 |
["greedy", "beam_search", "beam_sampling", "sampling_top_k", "sampling_top_p", "contrastive_sampling"],
|
|
|
791 |
|
792 |
submit = gr.Button(label="Generate")
|
793 |
|
794 |
+
# with gr.Column():
|
795 |
with gr.Row():
|
796 |
if IS_MAIN_SPACE:
|
797 |
outputs = [
|
app_dialogue.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
import os
|
2 |
|
3 |
import gradio as gr
|
4 |
-
import requests
|
5 |
|
6 |
|
7 |
models = [
|
8 |
-
"HuggingFaceM4/
|
9 |
-
# "HuggingFaceM4/
|
10 |
]
|
11 |
|
12 |
SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
|
@@ -31,18 +30,9 @@ BAN_TOKENS = "<image>;<fake_token_around_image>"
|
|
31 |
EOS_TOKENS = "</s>;User"
|
32 |
|
33 |
import logging
|
34 |
-
import re
|
35 |
-
from io import BytesIO
|
36 |
|
37 |
-
import torch
|
38 |
from accelerate.utils import get_max_memory
|
39 |
-
from
|
40 |
-
from transformers import AutoTokenizer
|
41 |
-
|
42 |
-
from m4.models.vllama.configuration_vllama import VLlamaConfig
|
43 |
-
from m4.models.vllama.modeling_vllama import VLlamaForCausalLM
|
44 |
-
from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
|
45 |
-
from m4.training.utils import build_image_transform
|
46 |
|
47 |
|
48 |
TOKENIZER_FAST = True
|
@@ -52,7 +42,12 @@ logging.basicConfig(level=logging.INFO)
|
|
52 |
logger = logging.getLogger()
|
53 |
|
54 |
|
55 |
-
def
|
|
|
|
|
|
|
|
|
|
|
56 |
tokenizer = AutoTokenizer.from_pretrained(
|
57 |
model_name,
|
58 |
use_fast=TOKENIZER_FAST,
|
@@ -61,7 +56,7 @@ def load_tokenizer_model(model_name):
|
|
61 |
)
|
62 |
# tokenizer.padding_side = "left" -> we don't need that, do we?
|
63 |
|
64 |
-
config =
|
65 |
max_memory_map = get_max_memory()
|
66 |
|
67 |
for key in max_memory_map.keys():
|
@@ -71,7 +66,7 @@ def load_tokenizer_model(model_name):
|
|
71 |
# Decrease 2 for Pytorch overhead and 2 for the forward to be safe
|
72 |
max_memory_map[key] = f"{max_memory_map[key] - 4} GiB"
|
73 |
|
74 |
-
model =
|
75 |
model_name,
|
76 |
use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
|
77 |
device_map="auto",
|
@@ -83,28 +78,23 @@ def load_tokenizer_model(model_name):
|
|
83 |
print("Current device map:", model.hf_device_map)
|
84 |
print("Model default generation config:", model.generation_config)
|
85 |
# TODO: the device_map looks very inefficien right now. that could be improved
|
86 |
-
return tokenizer, model
|
87 |
|
88 |
|
89 |
-
def
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
)
|
95 |
-
|
96 |
-
images = []
|
97 |
-
for url in url_images:
|
98 |
-
if isinstance(url, str):
|
99 |
-
images.append(Image.open(BytesIO(requests.get(url, stream=True, headers=headers).content)))
|
100 |
else:
|
101 |
-
|
102 |
-
return
|
103 |
-
|
104 |
|
105 |
def model_generation(
|
106 |
prompt,
|
107 |
-
|
108 |
tokenizer,
|
109 |
model,
|
110 |
temperature,
|
@@ -123,31 +113,15 @@ def model_generation(
|
|
123 |
top_p,
|
124 |
penalty_alpha,
|
125 |
):
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
truncation=True,
|
130 |
max_length=MAX_SEQ_LEN - 512, # TODO: replace the 512 value with `max_new_tokens`
|
131 |
padding=True,
|
132 |
-
add_special_tokens=False,
|
133 |
)
|
134 |
-
|
135 |
-
|
136 |
-
attention_mask = torch.tensor([[1] + tokens.attention_mask[0]])
|
137 |
-
|
138 |
-
image_attention_mask = [
|
139 |
-
incremental_to_binary_attention_mask(
|
140 |
-
image_attention_mask_for_packed_input_ids(input_ids[0].unsqueeze(0), tokenizer)[0], num_classes=len(images)
|
141 |
-
)
|
142 |
-
]
|
143 |
-
|
144 |
-
image_transform = build_image_transform(eval=True)
|
145 |
-
pixel_values = [torch.stack([image_transform(img) for img in images])]
|
146 |
-
|
147 |
-
input_ids = input_ids.to(0)
|
148 |
-
attention_mask = attention_mask.to(0)
|
149 |
-
pixel_values = torch.stack(pixel_values).to(0)
|
150 |
-
image_attention_mask = torch.cat(image_attention_mask, 0).to(0)
|
151 |
|
152 |
# Excluding some words from the generation
|
153 |
bad_words_ids = None
|
@@ -179,13 +153,6 @@ def model_generation(
|
|
179 |
)
|
180 |
eos_token_ids += tokenized_eos_token
|
181 |
|
182 |
-
# Inputs
|
183 |
-
input_args = {
|
184 |
-
"input_ids": input_ids,
|
185 |
-
"attention_mask": attention_mask,
|
186 |
-
"pixel_values": pixel_values,
|
187 |
-
"image_attention_mask": image_attention_mask,
|
188 |
-
}
|
189 |
# Common parameters to all decoding strategies
|
190 |
# This documentation is useful to read: https://huggingface.co/docs/transformers/main/en/generation_strategies
|
191 |
generation_args = {
|
@@ -239,7 +206,7 @@ def model_generation(
|
|
239 |
tokenizer.batch_decode(generated_tokens, skip_special_tokens=hide_special_tokens)[0]
|
240 |
)
|
241 |
|
242 |
-
actual_generated_tokens = generated_tokens[:, input_ids.shape[-1] :]
|
243 |
first_end_token = len(actual_generated_tokens[0])
|
244 |
actual_generated_tokens = actual_generated_tokens[:, :first_end_token]
|
245 |
generated_text = tokenizer.batch_decode(actual_generated_tokens, skip_special_tokens=hide_special_tokens)[0]
|
@@ -285,7 +252,7 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
285 |
show_label=False,
|
286 |
container=False,
|
287 |
)
|
288 |
-
tokenizer, model =
|
289 |
|
290 |
imagebox = gr.Image(
|
291 |
type="pil",
|
@@ -329,7 +296,7 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
329 |
elem_id="chatbot",
|
330 |
label="Idefics Chatbot",
|
331 |
visible=True,
|
332 |
-
height=
|
333 |
value=[
|
334 |
[
|
335 |
(
|
@@ -391,7 +358,7 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
391 |
user_prompt,
|
392 |
chat_history,
|
393 |
):
|
394 |
-
global model, tokenizer
|
395 |
|
396 |
temperature = 1.0
|
397 |
no_repeat_ngram_size = 0
|
@@ -412,15 +379,9 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
412 |
history=chat_history,
|
413 |
)
|
414 |
|
415 |
-
url_images = re.findall(r"<image(.*?)>", formated_prompt)
|
416 |
-
for idx, url_image in enumerate(url_images):
|
417 |
-
formated_prompt = formated_prompt.replace(url_image, "")
|
418 |
-
url_images[idx] = url_images[idx][1:]
|
419 |
-
images = fetch_images(url_images)
|
420 |
-
|
421 |
generated_text = model_generation(
|
422 |
prompt=formated_prompt,
|
423 |
-
|
424 |
tokenizer=tokenizer,
|
425 |
model=model,
|
426 |
temperature=temperature,
|
|
|
1 |
import os
|
2 |
|
3 |
import gradio as gr
|
|
|
4 |
|
5 |
|
6 |
models = [
|
7 |
+
"HuggingFaceM4/idefics-9b-instruct",
|
8 |
+
# "HuggingFaceM4/idefics-80b-instruct",
|
9 |
]
|
10 |
|
11 |
SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
|
|
|
30 |
EOS_TOKENS = "</s>;User"
|
31 |
|
32 |
import logging
|
|
|
|
|
33 |
|
|
|
34 |
from accelerate.utils import get_max_memory
|
35 |
+
from transformers import AutoTokenizer, AutoProcessor, AutoConfig, AutoModelForCausalLM
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
|
38 |
TOKENIZER_FAST = True
|
|
|
42 |
logger = logging.getLogger()
|
43 |
|
44 |
|
45 |
+
def load_processor_tokenizer_model(model_name):
|
46 |
+
processor = AutoProcessor.from_pretrained(
|
47 |
+
model_name,
|
48 |
+
use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
|
49 |
+
truncation_side="left",
|
50 |
+
)
|
51 |
tokenizer = AutoTokenizer.from_pretrained(
|
52 |
model_name,
|
53 |
use_fast=TOKENIZER_FAST,
|
|
|
56 |
)
|
57 |
# tokenizer.padding_side = "left" -> we don't need that, do we?
|
58 |
|
59 |
+
config = AutoConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
|
60 |
max_memory_map = get_max_memory()
|
61 |
|
62 |
for key in max_memory_map.keys():
|
|
|
66 |
# Decrease 2 for Pytorch overhead and 2 for the forward to be safe
|
67 |
max_memory_map[key] = f"{max_memory_map[key] - 4} GiB"
|
68 |
|
69 |
+
model = AutoModelForCausalLM.from_pretrained(
|
70 |
model_name,
|
71 |
use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
|
72 |
device_map="auto",
|
|
|
78 |
print("Current device map:", model.hf_device_map)
|
79 |
print("Model default generation config:", model.generation_config)
|
80 |
# TODO: the device_map looks very inefficien right now. that could be improved
|
81 |
+
return processor, tokenizer, model
|
82 |
|
83 |
|
84 |
+
def split_prompt_into_list(prompt_str):
|
85 |
+
"""Convert a full string prompt to the list format expected by the processor."""
|
86 |
+
prompt_splitted = prompt_str.split("<fake_token_around_image>")
|
87 |
+
prompt_list = []
|
88 |
+
for ps in prompt_splitted:
|
89 |
+
if ps.startswith("<image:"):
|
90 |
+
prompt_list.append(ps[7:-1])
|
|
|
|
|
|
|
|
|
91 |
else:
|
92 |
+
prompt_list.append(ps)
|
93 |
+
return prompt_list
|
|
|
94 |
|
95 |
def model_generation(
|
96 |
prompt,
|
97 |
+
processor,
|
98 |
tokenizer,
|
99 |
model,
|
100 |
temperature,
|
|
|
113 |
top_p,
|
114 |
penalty_alpha,
|
115 |
):
|
116 |
+
input_args = processor(
|
117 |
+
[split_prompt_into_list(prompt)],
|
118 |
+
eval_mode=True,
|
119 |
truncation=True,
|
120 |
max_length=MAX_SEQ_LEN - 512, # TODO: replace the 512 value with `max_new_tokens`
|
121 |
padding=True,
|
|
|
122 |
)
|
123 |
+
for k, v in input_args.items():
|
124 |
+
input_args[k] = v.to(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
# Excluding some words from the generation
|
127 |
bad_words_ids = None
|
|
|
153 |
)
|
154 |
eos_token_ids += tokenized_eos_token
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
# Common parameters to all decoding strategies
|
157 |
# This documentation is useful to read: https://huggingface.co/docs/transformers/main/en/generation_strategies
|
158 |
generation_args = {
|
|
|
206 |
tokenizer.batch_decode(generated_tokens, skip_special_tokens=hide_special_tokens)[0]
|
207 |
)
|
208 |
|
209 |
+
actual_generated_tokens = generated_tokens[:, input_args["input_ids"].shape[-1] :]
|
210 |
first_end_token = len(actual_generated_tokens[0])
|
211 |
actual_generated_tokens = actual_generated_tokens[:, :first_end_token]
|
212 |
generated_text = tokenizer.batch_decode(actual_generated_tokens, skip_special_tokens=hide_special_tokens)[0]
|
|
|
252 |
show_label=False,
|
253 |
container=False,
|
254 |
)
|
255 |
+
processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
|
256 |
|
257 |
imagebox = gr.Image(
|
258 |
type="pil",
|
|
|
296 |
elem_id="chatbot",
|
297 |
label="Idefics Chatbot",
|
298 |
visible=True,
|
299 |
+
height=750,
|
300 |
value=[
|
301 |
[
|
302 |
(
|
|
|
358 |
user_prompt,
|
359 |
chat_history,
|
360 |
):
|
361 |
+
global processor, model, tokenizer
|
362 |
|
363 |
temperature = 1.0
|
364 |
no_repeat_ngram_size = 0
|
|
|
379 |
history=chat_history,
|
380 |
)
|
381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
generated_text = model_generation(
|
383 |
prompt=formated_prompt,
|
384 |
+
processor=processor,
|
385 |
tokenizer=tokenizer,
|
386 |
model=model,
|
387 |
temperature=temperature,
|