Spaces:
Running
on
A10G
Running
on
A10G
tricktreat
commited on
Commit
•
5471e91
1
Parent(s):
f3e41d6
text to video
Browse files- app.py +4 -2
- awesome_chat.py +9 -4
- config.gradio.yaml +1 -1
- models_server.py +24 -24
app.py
CHANGED
@@ -115,7 +115,8 @@ with gr.Blocks() as demo:
|
|
115 |
openai_api_key = gr.Textbox(
|
116 |
show_label=False,
|
117 |
placeholder="Set your OpenAI API key here and press Enter",
|
118 |
-
lines=1
|
|
|
119 |
).style(container=False)
|
120 |
with gr.Column(scale=0.15, min_width=0):
|
121 |
btn1 = gr.Button("Submit").style(full_height=True)
|
@@ -125,7 +126,8 @@ with gr.Blocks() as demo:
|
|
125 |
hugging_face_token = gr.Textbox(
|
126 |
show_label=False,
|
127 |
placeholder="Set your Hugging Face Token here and press Enter",
|
128 |
-
lines=1
|
|
|
129 |
).style(container=False)
|
130 |
with gr.Column(scale=0.15, min_width=0):
|
131 |
btn3 = gr.Button("Submit").style(full_height=True)
|
|
|
115 |
openai_api_key = gr.Textbox(
|
116 |
show_label=False,
|
117 |
placeholder="Set your OpenAI API key here and press Enter",
|
118 |
+
lines=1,
|
119 |
+
type="password"
|
120 |
).style(container=False)
|
121 |
with gr.Column(scale=0.15, min_width=0):
|
122 |
btn1 = gr.Button("Submit").style(full_height=True)
|
|
|
126 |
hugging_face_token = gr.Textbox(
|
127 |
show_label=False,
|
128 |
placeholder="Set your Hugging Face Token here and press Enter",
|
129 |
+
lines=1,
|
130 |
+
type="password"
|
131 |
).style(container=False)
|
132 |
with gr.Column(scale=0.15, min_width=0):
|
133 |
btn3 = gr.Button("Submit").style(full_height=True)
|
awesome_chat.py
CHANGED
@@ -152,6 +152,8 @@ def send_request(data):
|
|
152 |
|
153 |
response = requests.post(endpoint, json=data, headers=HEADER, proxies=PROXY)
|
154 |
logger.debug(response.text.strip())
|
|
|
|
|
155 |
if use_completion:
|
156 |
return response.json()["choices"][0]["text"].strip()
|
157 |
else:
|
@@ -576,14 +578,14 @@ def model_inference(model_id, data, hosted_on, task, huggingfacetoken=None):
|
|
576 |
HUGGINGFACE_HEADERS = None
|
577 |
if hosted_on == "unknown":
|
578 |
r = status(model_id)
|
579 |
-
logger.debug("Local Server Status: " + str(r
|
580 |
-
if
|
581 |
hosted_on = "local"
|
582 |
else:
|
583 |
huggingfaceStatusUrl = f"https://api-inference.huggingface.co/status/{model_id}"
|
584 |
r = requests.get(huggingfaceStatusUrl, headers=HUGGINGFACE_HEADERS, proxies=PROXY)
|
585 |
logger.debug("Huggingface Status: " + str(r.json()))
|
586 |
-
if
|
587 |
hosted_on = "huggingface"
|
588 |
try:
|
589 |
if hosted_on == "local":
|
@@ -603,7 +605,7 @@ def get_model_status(model_id, url, headers, queue = None):
|
|
603 |
r = requests.get(url, headers=headers, proxies=PROXY)
|
604 |
else:
|
605 |
r = status(model_id)
|
606 |
-
if
|
607 |
if queue:
|
608 |
queue.put((model_id, True, endpoint_type))
|
609 |
return True
|
@@ -836,6 +838,9 @@ def chat_huggingface(messages, openaikey = None, huggingfacetoken = None, return
|
|
836 |
task_str = parse_task(context, input, openaikey).strip()
|
837 |
logger.info(task_str)
|
838 |
|
|
|
|
|
|
|
839 |
if task_str == "[]": # using LLM response for empty task
|
840 |
record_case(success=False, **{"input": input, "task": [], "reason": "task parsing fail: empty", "op": "chitchat"})
|
841 |
response = chitchat(messages, openaikey)
|
|
|
152 |
|
153 |
response = requests.post(endpoint, json=data, headers=HEADER, proxies=PROXY)
|
154 |
logger.debug(response.text.strip())
|
155 |
+
if "choices" not in response.json():
|
156 |
+
return response.json()
|
157 |
if use_completion:
|
158 |
return response.json()["choices"][0]["text"].strip()
|
159 |
else:
|
|
|
578 |
HUGGINGFACE_HEADERS = None
|
579 |
if hosted_on == "unknown":
|
580 |
r = status(model_id)
|
581 |
+
logger.debug("Local Server Status: " + str(r))
|
582 |
+
if "loaded" in r and r["loaded"]:
|
583 |
hosted_on = "local"
|
584 |
else:
|
585 |
huggingfaceStatusUrl = f"https://api-inference.huggingface.co/status/{model_id}"
|
586 |
r = requests.get(huggingfaceStatusUrl, headers=HUGGINGFACE_HEADERS, proxies=PROXY)
|
587 |
logger.debug("Huggingface Status: " + str(r.json()))
|
588 |
+
if "loaded" in r and r["loaded"]:
|
589 |
hosted_on = "huggingface"
|
590 |
try:
|
591 |
if hosted_on == "local":
|
|
|
605 |
r = requests.get(url, headers=headers, proxies=PROXY)
|
606 |
else:
|
607 |
r = status(model_id)
|
608 |
+
if "loaded" in r and r["loaded"]:
|
609 |
if queue:
|
610 |
queue.put((model_id, True, endpoint_type))
|
611 |
return True
|
|
|
838 |
task_str = parse_task(context, input, openaikey).strip()
|
839 |
logger.info(task_str)
|
840 |
|
841 |
+
if "error" in task_str:
|
842 |
+
return {"message": "You exceeded your current quota, please check your plan and billing details."}
|
843 |
+
|
844 |
if task_str == "[]": # using LLM response for empty task
|
845 |
record_case(success=False, **{"input": input, "task": [], "reason": "task parsing fail: empty", "op": "chitchat"})
|
846 |
response = chitchat(messages, openaikey)
|
config.gradio.yaml
CHANGED
@@ -8,7 +8,7 @@ log_file: logs/debug.log
|
|
8 |
model: text-davinci-003 # text-davinci-003
|
9 |
use_completion: true
|
10 |
inference_mode: hybrid # local, huggingface or hybrid
|
11 |
-
local_deployment:
|
12 |
num_candidate_models: 5
|
13 |
max_description_length: 100
|
14 |
proxy:
|
|
|
8 |
model: text-davinci-003 # text-davinci-003
|
9 |
use_completion: true
|
10 |
inference_mode: hybrid # local, huggingface or hybrid
|
11 |
+
local_deployment: full # minimal, standard or full
|
12 |
num_candidate_models: 5
|
13 |
max_description_length: 100
|
14 |
proxy:
|
models_server.py
CHANGED
@@ -78,9 +78,9 @@ def load_pipes(local_deployment):
|
|
78 |
if local_deployment in ["full"]:
|
79 |
other_pipes = {
|
80 |
"nlpconnect/vit-gpt2-image-captioning":{
|
81 |
-
"model": VisionEncoderDecoderModel.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
|
82 |
-
"feature_extractor": ViTImageProcessor.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
|
83 |
-
"tokenizer": AutoTokenizer.from_pretrained(f"nlpconnect/vit-gpt2-image-captioning"),
|
84 |
"device": "cuda:0"
|
85 |
},
|
86 |
# "Salesforce/blip-image-captioning-large": {
|
@@ -89,7 +89,7 @@ def load_pipes(local_deployment):
|
|
89 |
# "device": "cuda:0"
|
90 |
# },
|
91 |
"damo-vilab/text-to-video-ms-1.7b": {
|
92 |
-
"model": DiffusionPipeline.from_pretrained(f"damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16"),
|
93 |
"device": "cuda:0"
|
94 |
},
|
95 |
# "facebook/maskformer-swin-large-ade": {
|
@@ -112,11 +112,11 @@ def load_pipes(local_deployment):
|
|
112 |
"device": "cuda:0"
|
113 |
},
|
114 |
"espnet/kan-bayashi_ljspeech_vits": {
|
115 |
-
"model": Text2Speech.from_pretrained(
|
116 |
"device": "cuda:0"
|
117 |
},
|
118 |
"lambdalabs/sd-image-variations-diffusers": {
|
119 |
-
"model": DiffusionPipeline.from_pretrained(f"lambdalabs/sd-image-variations-diffusers"), #torch_dtype=torch.float16
|
120 |
"device": "cuda:0"
|
121 |
},
|
122 |
# "CompVis/stable-diffusion-v1-4": {
|
@@ -128,7 +128,7 @@ def load_pipes(local_deployment):
|
|
128 |
# "device": "cuda:0"
|
129 |
# },
|
130 |
"runwayml/stable-diffusion-v1-5": {
|
131 |
-
"model": DiffusionPipeline.from_pretrained(f"runwayml/stable-diffusion-v1-5"),
|
132 |
"device": "cuda:0"
|
133 |
},
|
134 |
# "microsoft/speecht5_tts":{
|
@@ -143,10 +143,10 @@ def load_pipes(local_deployment):
|
|
143 |
# "device": "cuda:0"
|
144 |
# },
|
145 |
"microsoft/speecht5_vc":{
|
146 |
-
"processor": SpeechT5Processor.from_pretrained(f"microsoft/speecht5_vc"),
|
147 |
-
"model": SpeechT5ForSpeechToSpeech.from_pretrained(f"microsoft/speecht5_vc"),
|
148 |
-
"vocoder": SpeechT5HifiGan.from_pretrained(f"microsoft/speecht5_hifigan"),
|
149 |
-
"embeddings_dataset": load_dataset(f"Matthijs/cmu-arctic-xvectors", split="validation"),
|
150 |
"device": "cuda:0"
|
151 |
},
|
152 |
# "julien-c/wine-quality": {
|
@@ -158,13 +158,13 @@ def load_pipes(local_deployment):
|
|
158 |
# "device": "cuda:0"
|
159 |
# },
|
160 |
"facebook/maskformer-swin-base-coco": {
|
161 |
-
"feature_extractor": MaskFormerFeatureExtractor.from_pretrained(f"facebook/maskformer-swin-base-coco"),
|
162 |
-
"model": MaskFormerForInstanceSegmentation.from_pretrained(f"facebook/maskformer-swin-base-coco"),
|
163 |
"device": "cuda:0"
|
164 |
},
|
165 |
"Intel/dpt-hybrid-midas": {
|
166 |
-
"model": DPTForDepthEstimation.from_pretrained(f"Intel/dpt-hybrid-midas", low_cpu_mem_usage=True),
|
167 |
-
"feature_extractor": DPTFeatureExtractor.from_pretrained(f"Intel/dpt-hybrid-midas"),
|
168 |
"device": "cuda:0"
|
169 |
}
|
170 |
}
|
@@ -176,15 +176,15 @@ def load_pipes(local_deployment):
|
|
176 |
# "device": "cuda:0"
|
177 |
# },
|
178 |
"openai/whisper-base": {
|
179 |
-
"model": pipeline(task="automatic-speech-recognition", model=f"openai/whisper-base"),
|
180 |
"device": "cuda:0"
|
181 |
},
|
182 |
"microsoft/speecht5_asr": {
|
183 |
-
"model": pipeline(task="automatic-speech-recognition", model=f"microsoft/speecht5_asr"),
|
184 |
"device": "cuda:0"
|
185 |
},
|
186 |
"Intel/dpt-large": {
|
187 |
-
"model": pipeline(task="depth-estimation", model=f"Intel/dpt-large"),
|
188 |
"device": "cuda:0"
|
189 |
},
|
190 |
# "microsoft/beit-base-patch16-224-pt22k-ft22k": {
|
@@ -192,11 +192,11 @@ def load_pipes(local_deployment):
|
|
192 |
# "device": "cuda:0"
|
193 |
# },
|
194 |
"facebook/detr-resnet-50-panoptic": {
|
195 |
-
"model": pipeline(task="image-segmentation", model=f"facebook/detr-resnet-50-panoptic"),
|
196 |
"device": "cuda:0"
|
197 |
},
|
198 |
"facebook/detr-resnet-101": {
|
199 |
-
"model": pipeline(task="object-detection", model=f"facebook/detr-resnet-101"),
|
200 |
"device": "cuda:0"
|
201 |
},
|
202 |
# "openai/clip-vit-large-patch14": {
|
@@ -204,7 +204,7 @@ def load_pipes(local_deployment):
|
|
204 |
# "device": "cuda:0"
|
205 |
# },
|
206 |
"google/owlvit-base-patch32": {
|
207 |
-
"model": pipeline(task="zero-shot-object-detection", model=f"google/owlvit-base-patch32"),
|
208 |
"device": "cuda:0"
|
209 |
},
|
210 |
# "microsoft/DialoGPT-medium": {
|
@@ -248,15 +248,15 @@ def load_pipes(local_deployment):
|
|
248 |
# "device": "cuda:0"
|
249 |
# },
|
250 |
"impira/layoutlm-document-qa": {
|
251 |
-
"model": pipeline(task="document-question-answering", model=f"impira/layoutlm-document-qa"),
|
252 |
"device": "cuda:0"
|
253 |
},
|
254 |
"ydshieh/vit-gpt2-coco-en": {
|
255 |
-
"model": pipeline(task="image-to-text", model=f"ydshieh/vit-gpt2-coco-en"),
|
256 |
"device": "cuda:0"
|
257 |
},
|
258 |
"dandelin/vilt-b32-finetuned-vqa": {
|
259 |
-
"model": pipeline(task="visual-question-answering", model=f"dandelin/vilt-b32-finetuned-vqa"),
|
260 |
"device": "cuda:0"
|
261 |
}
|
262 |
}
|
|
|
78 |
if local_deployment in ["full"]:
|
79 |
other_pipes = {
|
80 |
"nlpconnect/vit-gpt2-image-captioning":{
|
81 |
+
"model": VisionEncoderDecoderModel.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
82 |
+
"feature_extractor": ViTImageProcessor.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
83 |
+
"tokenizer": AutoTokenizer.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
84 |
"device": "cuda:0"
|
85 |
},
|
86 |
# "Salesforce/blip-image-captioning-large": {
|
|
|
89 |
# "device": "cuda:0"
|
90 |
# },
|
91 |
"damo-vilab/text-to-video-ms-1.7b": {
|
92 |
+
"model": DiffusionPipeline.from_pretrained(f"{local_models}damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16"),
|
93 |
"device": "cuda:0"
|
94 |
},
|
95 |
# "facebook/maskformer-swin-large-ade": {
|
|
|
112 |
"device": "cuda:0"
|
113 |
},
|
114 |
"espnet/kan-bayashi_ljspeech_vits": {
|
115 |
+
"model": Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_vits"),
|
116 |
"device": "cuda:0"
|
117 |
},
|
118 |
"lambdalabs/sd-image-variations-diffusers": {
|
119 |
+
"model": DiffusionPipeline.from_pretrained(f"{local_models}lambdalabs/sd-image-variations-diffusers"), #torch_dtype=torch.float16
|
120 |
"device": "cuda:0"
|
121 |
},
|
122 |
# "CompVis/stable-diffusion-v1-4": {
|
|
|
128 |
# "device": "cuda:0"
|
129 |
# },
|
130 |
"runwayml/stable-diffusion-v1-5": {
|
131 |
+
"model": DiffusionPipeline.from_pretrained(f"{local_models}runwayml/stable-diffusion-v1-5"),
|
132 |
"device": "cuda:0"
|
133 |
},
|
134 |
# "microsoft/speecht5_tts":{
|
|
|
143 |
# "device": "cuda:0"
|
144 |
# },
|
145 |
"microsoft/speecht5_vc":{
|
146 |
+
"processor": SpeechT5Processor.from_pretrained(f"{local_models}microsoft/speecht5_vc"),
|
147 |
+
"model": SpeechT5ForSpeechToSpeech.from_pretrained(f"{local_models}microsoft/speecht5_vc"),
|
148 |
+
"vocoder": SpeechT5HifiGan.from_pretrained(f"{local_models}microsoft/speecht5_hifigan"),
|
149 |
+
"embeddings_dataset": load_dataset(f"{local_models}Matthijs/cmu-arctic-xvectors", split="validation"),
|
150 |
"device": "cuda:0"
|
151 |
},
|
152 |
# "julien-c/wine-quality": {
|
|
|
158 |
# "device": "cuda:0"
|
159 |
# },
|
160 |
"facebook/maskformer-swin-base-coco": {
|
161 |
+
"feature_extractor": MaskFormerFeatureExtractor.from_pretrained(f"{local_models}facebook/maskformer-swin-base-coco"),
|
162 |
+
"model": MaskFormerForInstanceSegmentation.from_pretrained(f"{local_models}facebook/maskformer-swin-base-coco"),
|
163 |
"device": "cuda:0"
|
164 |
},
|
165 |
"Intel/dpt-hybrid-midas": {
|
166 |
+
"model": DPTForDepthEstimation.from_pretrained(f"{local_models}Intel/dpt-hybrid-midas", low_cpu_mem_usage=True),
|
167 |
+
"feature_extractor": DPTFeatureExtractor.from_pretrained(f"{local_models}Intel/dpt-hybrid-midas"),
|
168 |
"device": "cuda:0"
|
169 |
}
|
170 |
}
|
|
|
176 |
# "device": "cuda:0"
|
177 |
# },
|
178 |
"openai/whisper-base": {
|
179 |
+
"model": pipeline(task="automatic-speech-recognition", model=f"{local_models}openai/whisper-base"),
|
180 |
"device": "cuda:0"
|
181 |
},
|
182 |
"microsoft/speecht5_asr": {
|
183 |
+
"model": pipeline(task="automatic-speech-recognition", model=f"{local_models}microsoft/speecht5_asr"),
|
184 |
"device": "cuda:0"
|
185 |
},
|
186 |
"Intel/dpt-large": {
|
187 |
+
"model": pipeline(task="depth-estimation", model=f"{local_models}Intel/dpt-large"),
|
188 |
"device": "cuda:0"
|
189 |
},
|
190 |
# "microsoft/beit-base-patch16-224-pt22k-ft22k": {
|
|
|
192 |
# "device": "cuda:0"
|
193 |
# },
|
194 |
"facebook/detr-resnet-50-panoptic": {
|
195 |
+
"model": pipeline(task="image-segmentation", model=f"{local_models}facebook/detr-resnet-50-panoptic"),
|
196 |
"device": "cuda:0"
|
197 |
},
|
198 |
"facebook/detr-resnet-101": {
|
199 |
+
"model": pipeline(task="object-detection", model=f"{local_models}facebook/detr-resnet-101"),
|
200 |
"device": "cuda:0"
|
201 |
},
|
202 |
# "openai/clip-vit-large-patch14": {
|
|
|
204 |
# "device": "cuda:0"
|
205 |
# },
|
206 |
"google/owlvit-base-patch32": {
|
207 |
+
"model": pipeline(task="zero-shot-object-detection", model=f"{local_models}google/owlvit-base-patch32"),
|
208 |
"device": "cuda:0"
|
209 |
},
|
210 |
# "microsoft/DialoGPT-medium": {
|
|
|
248 |
# "device": "cuda:0"
|
249 |
# },
|
250 |
"impira/layoutlm-document-qa": {
|
251 |
+
"model": pipeline(task="document-question-answering", model=f"{local_models}impira/layoutlm-document-qa"),
|
252 |
"device": "cuda:0"
|
253 |
},
|
254 |
"ydshieh/vit-gpt2-coco-en": {
|
255 |
+
"model": pipeline(task="image-to-text", model=f"{local_models}ydshieh/vit-gpt2-coco-en"),
|
256 |
"device": "cuda:0"
|
257 |
},
|
258 |
"dandelin/vilt-b32-finetuned-vqa": {
|
259 |
+
"model": pipeline(task="visual-question-answering", model=f"{local_models}dandelin/vilt-b32-finetuned-vqa"),
|
260 |
"device": "cuda:0"
|
261 |
}
|
262 |
}
|