Spaces:
Running
on
Zero
Running
on
Zero
kwabs22
commited on
Commit
•
f672e00
1
Parent(s):
4dd165c
Migrate to ZeroGPU attempt
Browse files- app.py +104 -5
- requirements.txt +2 -0
app.py
CHANGED
@@ -5,6 +5,76 @@ import re
|
|
5 |
import os
|
6 |
import shutil
|
7 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Default configuration template
|
10 |
default_config = {
|
@@ -648,6 +718,25 @@ with gr.Blocks() as demo:
|
|
648 |
with gr.Accordion("Qwen 0.5B as Space Guide Tests", open=False):
|
649 |
gr.HTML("Placeholder for FAQ type - front end as prompt engineering for the first message to force direction of conversion")
|
650 |
gr.HTML("Placeholder for weak RAG Type - Explanations through an opensource embeddings engine")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
651 |
with gr.Accordion("Temporary Asset Management Assist - click to open", open=False):
|
652 |
gr.HTML("Make Files and Text ideas for the field and paste <br>When Space is restarted it will clear - zip export and import will be added later")
|
653 |
with gr.Accordion("Upload Files for config"):
|
@@ -1468,12 +1557,20 @@ Would you like me to elaborate on any of these ideas or show how to implement th
|
|
1468 |
|
1469 |
with gr.Tab("Images"):
|
1470 |
with gr.Accordion("Image Gen or Animation HF Spaces/Sites (Click Here to Open) - Have to download and upload at the the top", open=False):
|
1471 |
-
with gr.Tabs("General"):
|
1472 |
-
|
1473 |
-
|
1474 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1475 |
imagegenspace = gr.HTML("Chat Space Chosen will load here")
|
1476 |
imagegenspacebtn.click(display_website, inputs=linktoimagegen, outputs=imagegenspace)
|
|
|
|
|
1477 |
gr.HTML("Concept Art, UI elements, Static/3D Characters, Environments and Objects")
|
1478 |
gr.HTML("Image Caption = https://huggingface.co/spaces/microsoft/Promptist, https://huggingface.co/spaces/gokaygokay/SD3-Long-Captioner, https://huggingface.co/spaces/gokaygokay/Florence-2, ")
|
1479 |
gr.HTML("Images Generation Portraits = https://huggingface.co/spaces/okaris/omni-zero")
|
@@ -1490,6 +1587,8 @@ Would you like me to elaborate on any of these ideas or show how to implement th
|
|
1490 |
gr.HTML("Placeholder for models small enough to run on cpu here in this space that can assist")
|
1491 |
|
1492 |
with gr.Tab("Video"):
|
|
|
|
|
1493 |
gr.HTML("Cutscenes, Tutorials, Trailers")
|
1494 |
gr.HTML("Portrait Video eg. Solo Taking NPC - https://huggingface.co/spaces/fffiloni/tts-hallo-talking-portrait (Image + Audio and combination) https://huggingface.co/spaces/KwaiVGI/LivePortrait (Non verbal communication eg. in a library, when running from a pursuer)")
|
1495 |
gr.HTML("Placeholder for huggingface spaces that can assist - https://huggingface.co/spaces/KingNish/Instant-Video, https://huggingface.co/spaces/multimodalart/stable-video-diffusion, https://huggingface.co/spaces/multimodalart/stable-video-diffusion")
|
@@ -1671,4 +1770,4 @@ Would you like me to elaborate on any of these ideas or show how to implement th
|
|
1671 |
</div>
|
1672 |
""")
|
1673 |
|
1674 |
-
demo.launch()
|
|
|
5 |
import os
|
6 |
import shutil
|
7 |
from PIL import Image
|
8 |
+
import spaces
|
9 |
+
import torch
|
10 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
11 |
+
from threading import Thread
|
12 |
+
import time
|
13 |
+
|
14 |
+
# Initialize the zero tensor on CUDA
|
15 |
+
zero = torch.Tensor([0]).cuda()
|
16 |
+
print(zero.device) # This will print 'cpu' outside the @spaces.GPU decorated function
|
17 |
+
|
18 |
+
# Load the model and tokenizer
|
19 |
+
llmguide_model = AutoModelForCausalLM.from_pretrained(
|
20 |
+
"Qwen/Qwen2-0.5B-Instruct",
|
21 |
+
torch_dtype="auto",
|
22 |
+
device_map="auto"
|
23 |
+
)
|
24 |
+
llmguide_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
25 |
+
|
26 |
+
@spaces.GPU
|
27 |
+
def llmguide_generate_response(prompt, stream=False):
|
28 |
+
print(zero.device) # This will print 'cuda:0' inside the @spaces.GPU decorated function
|
29 |
+
|
30 |
+
messages = [
|
31 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
32 |
+
{"role": "user", "content": prompt}
|
33 |
+
]
|
34 |
+
text = llmguide_tokenizer.apply_chat_template(
|
35 |
+
messages,
|
36 |
+
tokenize=False,
|
37 |
+
add_generation_prompt=True
|
38 |
+
)
|
39 |
+
model_inputs = llmguide_tokenizer([text], return_tensors="pt").to(zero.device)
|
40 |
+
|
41 |
+
start_time = time.time()
|
42 |
+
total_tokens = 0
|
43 |
+
|
44 |
+
if stream:
|
45 |
+
streamer = TextIteratorStreamer(llmguide_tokenizer, skip_special_tokens=True)
|
46 |
+
generation_kwargs = dict(
|
47 |
+
model_inputs,
|
48 |
+
streamer=streamer,
|
49 |
+
max_new_tokens=512,
|
50 |
+
temperature=0.7,
|
51 |
+
)
|
52 |
+
thread = Thread(target=llmguide_model.generate, kwargs=generation_kwargs)
|
53 |
+
thread.start()
|
54 |
+
|
55 |
+
generated_text = ""
|
56 |
+
for new_text in streamer:
|
57 |
+
generated_text += new_text
|
58 |
+
total_tokens += 1
|
59 |
+
current_time = time.time()
|
60 |
+
tokens_per_second = total_tokens / (current_time - start_time)
|
61 |
+
yield generated_text, f"{tokens_per_second:.2f}"
|
62 |
+
else:
|
63 |
+
generated_ids = llmguide_model.generate(
|
64 |
+
model_inputs.input_ids,
|
65 |
+
max_new_tokens=512
|
66 |
+
)
|
67 |
+
generated_ids = [
|
68 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
69 |
+
]
|
70 |
+
response = llmguide_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
71 |
+
total_tokens = len(generated_ids[0])
|
72 |
+
end_time = time.time()
|
73 |
+
tokens_per_second = total_tokens / (end_time - start_time)
|
74 |
+
yield response, f"{tokens_per_second:.2f}"
|
75 |
+
|
76 |
+
|
77 |
+
#--------------------------------------------------------------------------------------------------------------------------------
|
78 |
|
79 |
# Default configuration template
|
80 |
default_config = {
|
|
|
718 |
with gr.Accordion("Qwen 0.5B as Space Guide Tests", open=False):
|
719 |
gr.HTML("Placeholder for FAQ type - front end as prompt engineering for the first message to force direction of conversion")
|
720 |
gr.HTML("Placeholder for weak RAG Type - Explanations through an opensource embeddings engine")
|
721 |
+
gr.Markdown("# Qwen-0.5B-Instruct Language Model")
|
722 |
+
gr.Markdown("This demo uses the Qwen-0.5B-Instruct model to generate responses based on your input.")
|
723 |
+
|
724 |
+
with gr.Row():
|
725 |
+
with gr.Column():
|
726 |
+
llmguide_prompt = gr.Textbox(lines=2, placeholder="Enter your prompt here...")
|
727 |
+
llmguide_stream_checkbox = gr.Checkbox(label="Enable streaming")
|
728 |
+
llmguide_submit_button = gr.Button("Generate")
|
729 |
+
|
730 |
+
with gr.Column():
|
731 |
+
llmguide_output = gr.Textbox(lines=10, label="Generated Response")
|
732 |
+
llmguide_tokens_per_second = gr.Textbox(label="Tokens per Second")
|
733 |
+
|
734 |
+
llmguide_submit_button.click(
|
735 |
+
llmguide_generate_response,
|
736 |
+
inputs=[llmguide_prompt, llmguide_stream_checkbox],
|
737 |
+
outputs=[llmguide_output, llmguide_tokens_per_second],
|
738 |
+
)
|
739 |
+
|
740 |
with gr.Accordion("Temporary Asset Management Assist - click to open", open=False):
|
741 |
gr.HTML("Make Files and Text ideas for the field and paste <br>When Space is restarted it will clear - zip export and import will be added later")
|
742 |
with gr.Accordion("Upload Files for config"):
|
|
|
1557 |
|
1558 |
with gr.Tab("Images"):
|
1559 |
with gr.Accordion("Image Gen or Animation HF Spaces/Sites (Click Here to Open) - Have to download and upload at the the top", open=False):
|
1560 |
+
# with gr.Tabs("General"):
|
1561 |
+
with gr.Row():
|
1562 |
+
linktoimagegen = gr.Dropdown(choices=["General", "https://prodia-sdxl-stable-diffusion-xl.hf.space", "https://prodia-fast-stable-diffusion.hf.space", "https://bytedance-hyper-sdxl-1step-t2i.hf.space", "https://multimodalart-cosxl.hf.space", "https://cagliostrolab-animagine-xl-3-1.hf.space",
|
1563 |
+
"Speed", "https://radames-real-time-text-to-image-sdxl-lightning.hf.space", "https://ap123-sdxl-lightning.hf.space",
|
1564 |
+
"LORA Support", "https://artificialguybr-artificialguybr-demo-lora.hf.space", "https://ehristoforu-dalle-3-xl-lora-v2.hf.space",
|
1565 |
+
"Image to Image", "https://lllyasviel-ic-light.hf.space", "https://gparmar-img2img-turbo-sketch.hf.space",
|
1566 |
+
"Control of Pose", "https://instantx-instantid.hf.space",
|
1567 |
+
"Control of Shapes", "https://linoyts-scribble-sdxl-flash.hf.space", "https://modelscope-transferanything.hf.space", "https://visionmaze-magic-me.hf.space", "https://wangfuyun-animatelcm.hf.space", "https://guoyww-animatediff.hf.space", "https://segmind-segmind-stable-diffusion.hf.space", "https://simianluo-latent-consistency-model.hf.space", "https://artificialguybr-studio-ghibli-lora-sdxl.hf.space", "https://artificialguybr-pixel-art-generator.hf.space", "https://fffiloni-sdxl-control-loras.hf.space"], label="Choose/Cancel type any .hf.space link here (can also type a link)'", allow_custom_value=True)
|
1568 |
+
imagegenspacebtn = gr.Button("Use the chosen URL to load interface with a image generation model")
|
1569 |
+
|
1570 |
imagegenspace = gr.HTML("Chat Space Chosen will load here")
|
1571 |
imagegenspacebtn.click(display_website, inputs=linktoimagegen, outputs=imagegenspace)
|
1572 |
+
|
1573 |
+
linkstobecollectednoembed = "https://artgan-diffusion-api.hf.space", "https://multimodalart-stable-cascade.hf.space", "https://google-sdxl.hf.space",
|
1574 |
gr.HTML("Concept Art, UI elements, Static/3D Characters, Environments and Objects")
|
1575 |
gr.HTML("Image Caption = https://huggingface.co/spaces/microsoft/Promptist, https://huggingface.co/spaces/gokaygokay/SD3-Long-Captioner, https://huggingface.co/spaces/gokaygokay/Florence-2, ")
|
1576 |
gr.HTML("Images Generation Portraits = https://huggingface.co/spaces/okaris/omni-zero")
|
|
|
1587 |
gr.HTML("Placeholder for models small enough to run on cpu here in this space that can assist")
|
1588 |
|
1589 |
with gr.Tab("Video"):
|
1590 |
+
|
1591 |
+
linkstobecollectednoembed2 = "https://kadirnar-open-sora.hf.space", "https://wangfuyun-animatelcm-svd.hf.space", "https://bytedance-animatediff-lightning.hf.space",
|
1592 |
gr.HTML("Cutscenes, Tutorials, Trailers")
|
1593 |
gr.HTML("Portrait Video eg. Solo Taking NPC - https://huggingface.co/spaces/fffiloni/tts-hallo-talking-portrait (Image + Audio and combination) https://huggingface.co/spaces/KwaiVGI/LivePortrait (Non verbal communication eg. in a library, when running from a pursuer)")
|
1594 |
gr.HTML("Placeholder for huggingface spaces that can assist - https://huggingface.co/spaces/KingNish/Instant-Video, https://huggingface.co/spaces/multimodalart/stable-video-diffusion, https://huggingface.co/spaces/multimodalart/stable-video-diffusion")
|
|
|
1770 |
</div>
|
1771 |
""")
|
1772 |
|
1773 |
+
demo.queue().launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
accelerate
|