Spaces:

Rooni
/

GptVision

Runtime error

App Files Files Community

Rooni commited on Feb 14

Commit

c92287b

•

1 Parent(s): e216cee

Create app.py

Browse files

Files changed (1) hide show

app.py +55 -0

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import gradio as gr
+import torch
+from PIL import Image
+from io import BytesIO
+from huggingface_hub import hf_hub_download
+from processing_llava import LlavaProcessor, OpenCLIPImageProcessor
+from modeling_llava import LlavaForConditionalGeneration
+from transformers import AutoTokenizer, TextStreamer
+# Скачиваем необходимые файлы модели
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="configuration_llava.py", local_dir="./", force_download=True)
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="configuration_phi.py", local_dir="./", force_download=True)
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="modeling_llava.py", local_dir="./", force_download=True)
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="modeling_phi.py", local_dir="./", force_download=True)
+hf_hub_download(repo_id="OEvortex/HelpingAI-Vision", filename="processing_llava.py", local_dir="./", force_download=True)
+# Создаем модель
+model = LlavaForConditionalGeneration.from_pretrained("OEvortex/HelpingAI-Vision", torch_dtype=torch.float16)
+model = model.to("cuda")
+# Создаем процессоры
+tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI-Vision")
+image_processor = OpenCLIPImageProcessor(model.config.preprocess_config)
+processor = LlavaProcessor(image_processor, tokenizer)
+# Функция для генерации текста
+def generate_text(image, initial_text):
+    # Обрабатываем входные данные
+    with torch.inference_mode():
+        inputs = processor(initial_text, image, model, return_tensors='pt')
+        inputs['input_ids'] = inputs['input_ids'].to(model.device)
+        inputs['attention_mask'] = inputs['attention_mask'].to(model.device)
+        streamer = TextStreamer(tokenizer)
+        # Генерируем данные
+        output = model.generate(**inputs, max_new_tokens=200, do_sample=True, top_p=0.9, temperature=1.2, eos_token_id=tokenizer.eos_token_id, streamer=streamer)
+    # Возвращаем сгенерированный текст, убирая начальный и конечный токены
+    return tokenizer.decode(output[0], skip_special_tokens=True)
+# Создаем интерфейс Gradio
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="Загрузите изображение")
+            text_input = gr.Textbox(label="Введите текст запроса")
+        with gr.Column():
+            output_text = gr.Textbox(label="Сгенерированный текст")
+    generate_button = gr.Button("Генерировать текст")
+    generate_button.click(generate_text, inputs=[image_input, text_input], outputs=output_text)
+# Запускаем интерфейс
+demo.launch()