Rooni commited on
Commit
c92287b
1 Parent(s): e216cee

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from io import BytesIO
5
+ from huggingface_hub import hf_hub_download
6
+ from processing_llava import LlavaProcessor, OpenCLIPImageProcessor
7
+ from modeling_llava import LlavaForConditionalGeneration
8
+ from transformers import AutoTokenizer, TextStreamer
9
+
10
+ # Скачиваем необходимые файлы модели
11
+ hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="configuration_llava.py", local_dir="./", force_download=True)
12
+ hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="configuration_phi.py", local_dir="./", force_download=True)
13
+ hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="modeling_llava.py", local_dir="./", force_download=True)
14
+ hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="modeling_phi.py", local_dir="./", force_download=True)
15
+ hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="processing_llava.py", local_dir="./", force_download=True)
16
+
17
+ # Создаем модель
18
+ model = LlavaForConditionalGeneration.from_pretrained("OEvortex/HelpingAI-Vision", torch_dtype=torch.float16)
19
+ model = model.to("cuda")
20
+
21
+ # Создаем процессоры
22
+ tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI-Vision")
23
+ image_processor = OpenCLIPImageProcessor(model.config.preprocess_config)
24
+ processor = LlavaProcessor(image_processor, tokenizer)
25
+
26
+ # Функция для генерации текста
27
+ def generate_text(image, initial_text):
28
+ # Обрабатываем входные данные
29
+ with torch.inference_mode():
30
+ inputs = processor(initial_text, image, model, return_tensors='pt')
31
+ inputs['input_ids'] = inputs['input_ids'].to(model.device)
32
+ inputs['attention_mask'] = inputs['attention_mask'].to(model.device)
33
+
34
+ streamer = TextStreamer(tokenizer)
35
+
36
+ # Генерируем данные
37
+ output = model.generate(**inputs, max_new_tokens=200, do_sample=True, top_p=0.9, temperature=1.2, eos_token_id=tokenizer.eos_token_id, streamer=streamer)
38
+
39
+ # Возвращаем сгенерированный текст, убирая начальный и конечный токены
40
+ return tokenizer.decode(output[0], skip_special_tokens=True)
41
+
42
+ # Создаем интерфейс Gradio
43
+ with gr.Blocks() as demo:
44
+ with gr.Row():
45
+ with gr.Column():
46
+ image_input = gr.Image(type="pil", label="Загрузите изображение")
47
+ text_input = gr.Textbox(label="Введите текст запроса")
48
+ with gr.Column():
49
+ output_text = gr.Textbox(label="Сгенерированный текст")
50
+
51
+ generate_button = gr.Button("Генерировать текст")
52
+ generate_button.click(generate_text, inputs=[image_input, text_input], outputs=output_text)
53
+
54
+ # Запускаем интерфейс
55
+ demo.launch()