Spaces:
Running
on
Zero
Running
on
Zero
Update chatbot.py
Browse files- chatbot.py +73 -47
chatbot.py
CHANGED
@@ -4,16 +4,18 @@ import requests
|
|
4 |
import random
|
5 |
from threading import Thread
|
6 |
from typing import List, Dict, Union
|
7 |
-
import subprocess
|
8 |
-
subprocess.run(
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
)
|
13 |
import torch
|
14 |
import gradio as gr
|
15 |
from bs4 import BeautifulSoup
|
16 |
-
from transformers import LlavaProcessor, LlavaForConditionalGeneration,
|
|
|
|
|
17 |
from huggingface_hub import InferenceClient
|
18 |
from PIL import Image
|
19 |
import spaces
|
@@ -25,14 +27,10 @@ import json
|
|
25 |
from gradio_client import Client, file
|
26 |
from groq import Groq
|
27 |
|
28 |
-
#
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
processor = LlavaProcessor.from_pretrained(model_id)
|
33 |
-
model = LlavaForConditionalGeneration.from_pretrained(model_id,torch_dtype=torch.float16, use_flash_attention_2=True)
|
34 |
-
model.to("cuda")
|
35 |
-
# Credit to merve for code of llava interleave qwen
|
36 |
|
37 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", None)
|
38 |
|
@@ -172,39 +170,69 @@ def video_gen(prompt):
|
|
172 |
client = Client("KingNish/Instant-Video")
|
173 |
return client.predict(prompt, api_name="/instant_video")
|
174 |
|
175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
if user_prompt["files"]:
|
177 |
-
|
178 |
else:
|
179 |
for hist in chat_history:
|
180 |
-
if type(hist[0])==tuple:
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
image
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
|
207 |
-
return
|
208 |
|
209 |
# Initialize inference clients for different models
|
210 |
client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
@@ -215,9 +243,7 @@ client_mistral_nemo = InferenceClient("mistralai/Mistral-Nemo-Instruct-2407")
|
|
215 |
@spaces.GPU(duration=60, queue=False)
|
216 |
def model_inference( user_prompt, chat_history):
|
217 |
if user_prompt["files"]:
|
218 |
-
|
219 |
-
streamer = TextIteratorStreamer(processor, skip_prompt=True, **{"skip_special_tokens": True})
|
220 |
-
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
|
221 |
|
222 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
223 |
thread.start()
|
|
|
4 |
import random
|
5 |
from threading import Thread
|
6 |
from typing import List, Dict, Union
|
7 |
+
# import subprocess
|
8 |
+
# subprocess.run(
|
9 |
+
# "pip install flash-attn --no-build-isolation",
|
10 |
+
# env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
11 |
+
# shell=True,
|
12 |
+
# )
|
13 |
import torch
|
14 |
import gradio as gr
|
15 |
from bs4 import BeautifulSoup
|
16 |
+
from transformers import LlavaProcessor, LlavaForConditionalGeneration,
|
17 |
+
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, TextIteratorStreamer
|
18 |
+
from qwen_vl_utils import process_vision_info
|
19 |
from huggingface_hub import InferenceClient
|
20 |
from PIL import Image
|
21 |
import spaces
|
|
|
27 |
from gradio_client import Client, file
|
28 |
from groq import Groq
|
29 |
|
30 |
+
# Model and Processor Loading (Done once at startup)
|
31 |
+
MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct"
|
32 |
+
model = Qwen2VLForConditionalGeneration.from_pretrained(MODEL_ID, trust_remote_code=True, torch_dtype=torch.float16).to("cuda").eval()
|
33 |
+
processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
|
|
|
|
|
|
|
|
|
34 |
|
35 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", None)
|
36 |
|
|
|
170 |
client = Client("KingNish/Instant-Video")
|
171 |
return client.predict(prompt, api_name="/instant_video")
|
172 |
|
173 |
+
image_extensions = Image.registered_extensions()
|
174 |
+
video_extensions = ("avi", "mp4", "mov", "mkv", "flv", "wmv", "mjpeg", "wav", "gif", "webm", "m4v", "3gp")
|
175 |
+
|
176 |
+
def qwen_inference(user_prompt, chat_history):
|
177 |
+
images = []
|
178 |
+
text_input = user_prompt["text"]
|
179 |
+
|
180 |
+
# Handle multiple image uploads
|
181 |
if user_prompt["files"]:
|
182 |
+
images.extend(user_prompt["files"])
|
183 |
else:
|
184 |
for hist in chat_history:
|
185 |
+
if type(hist[0]) == tuple:
|
186 |
+
images.extend(hist[0])
|
187 |
+
|
188 |
+
# System Prompt (Similar to LLaVA)
|
189 |
+
SYSTEM_PROMPT = "You are OpenGPT 4o, an exceptionally capable and versatile AI assistant made by KingNish. Your task is to fulfill users query in best possible way. You are provided with image, videos and 3d structures as input with question your task is to give best possible detailed results to user according to their query. Reply the question asked by user properly and best possible way."
|
190 |
+
|
191 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
192 |
+
|
193 |
+
for image in images:
|
194 |
+
if image.endswith(video_extensions):
|
195 |
+
messages.append({
|
196 |
+
"role": "user",
|
197 |
+
"content": [
|
198 |
+
{"type": "video", "video": image},
|
199 |
+
]
|
200 |
+
})
|
201 |
+
|
202 |
+
if image.endswith(tuple([i for i, f in image_extensions.items()])):
|
203 |
+
messages.append({
|
204 |
+
"role": "user",
|
205 |
+
"content": [
|
206 |
+
{"type": "image", "image": image},
|
207 |
+
]
|
208 |
+
})
|
209 |
+
|
210 |
+
# Add user text input
|
211 |
+
messages.append({
|
212 |
+
"role": "user",
|
213 |
+
"content": [
|
214 |
+
{"type": "text", "text": text_input}
|
215 |
+
]
|
216 |
+
})
|
217 |
+
|
218 |
+
text = processor.apply_chat_template(
|
219 |
+
messages, tokenize=False, add_generation_prompt=True
|
220 |
+
)
|
221 |
+
image_inputs, video_inputs = process_vision_info(messages)
|
222 |
+
inputs = processor(
|
223 |
+
text=[text],
|
224 |
+
images=image_inputs,
|
225 |
+
videos=video_inputs,
|
226 |
+
padding=True,
|
227 |
+
return_tensors="pt",
|
228 |
+
).to("cuda")
|
229 |
+
|
230 |
+
streamer = TextIteratorStreamer(
|
231 |
+
processor, skip_prompt=True, **{"skip_special_tokens": True}
|
232 |
+
)
|
233 |
+
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
|
234 |
|
235 |
+
return generation_kwargs
|
236 |
|
237 |
# Initialize inference clients for different models
|
238 |
client_mistral = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
|
|
243 |
@spaces.GPU(duration=60, queue=False)
|
244 |
def model_inference( user_prompt, chat_history):
|
245 |
if user_prompt["files"]:
|
246 |
+
generation_kwargs = qwen_inference(user_prompt, chat_history)
|
|
|
|
|
247 |
|
248 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
249 |
thread.start()
|