almsot 02 version
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ from huggingface_hub import InferenceClient
|
|
3 |
|
4 |
|
5 |
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
6 |
-
#client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")
|
7 |
|
8 |
|
9 |
def generate_text(messages):
|
@@ -13,12 +12,11 @@ def generate_text(messages):
|
|
13 |
for token in client.chat_completion(messages, max_tokens=100,stream=True):
|
14 |
content = (token.choices[0].delta.content)
|
15 |
generated += content
|
16 |
-
#print(content)
|
17 |
-
#print(''.join(list(content)))
|
18 |
yield generated
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
22 |
|
23 |
def call_generate_text(message, history):
|
24 |
#if len(message) == 0:
|
@@ -35,7 +33,7 @@ def call_generate_text(message, history):
|
|
35 |
text_generator = generate_text(messages)
|
36 |
|
37 |
for text_chunk in text_generator:
|
38 |
-
print(f"chunk={text_chunk}")
|
39 |
assistant_message["content"] = text_chunk
|
40 |
updated_history = messages + [assistant_message]
|
41 |
yield "", updated_history
|
@@ -47,19 +45,28 @@ def call_generate_text(message, history):
|
|
47 |
head = '''
|
48 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
|
49 |
<script type="module">
|
50 |
-
import {
|
51 |
-
|
|
|
52 |
</script>
|
53 |
'''
|
54 |
|
55 |
with gr.Blocks(title="LLM with TTS",head=head) as demo:
|
56 |
-
gr.Markdown("
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
js = """
|
61 |
-
function(chatbot){
|
62 |
-
window.
|
63 |
//auto scroll
|
64 |
var chatElement = document.getElementById('gr-chatbot');
|
65 |
chatElement.scrollTop = chatElement.scrollHeight;
|
@@ -100,7 +107,7 @@ with gr.Blocks(title="LLM with TTS",head=head) as demo:
|
|
100 |
import os
|
101 |
remote_dir ="/home/user/app/"
|
102 |
local_dir = "C:\\Users\\owner\\Documents\\pythons\\huggingface\\mistral-7b-v0.3-matcha-tts-en" #sorry this is my develop env
|
103 |
-
|
104 |
# set not dir but file
|
105 |
#demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx")])
|
106 |
-
demo.launch(allowed_paths=[os.path.join(local_dir,"models","ljspeech_sim.onnx")])
|
|
|
3 |
|
4 |
|
5 |
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
|
|
|
6 |
|
7 |
|
8 |
def generate_text(messages):
|
|
|
12 |
for token in client.chat_completion(messages, max_tokens=100,stream=True):
|
13 |
content = (token.choices[0].delta.content)
|
14 |
generated += content
|
|
|
|
|
15 |
yield generated
|
16 |
+
|
17 |
+
last = generated[-1]
|
18 |
+
if last not in [",",".","!","?"]:
|
19 |
+
yield generated+"," #no stram version
|
20 |
|
21 |
def call_generate_text(message, history):
|
22 |
#if len(message) == 0:
|
|
|
33 |
text_generator = generate_text(messages)
|
34 |
|
35 |
for text_chunk in text_generator:
|
36 |
+
#print(f"chunk={text_chunk}")
|
37 |
assistant_message["content"] = text_chunk
|
38 |
updated_history = messages + [assistant_message]
|
39 |
yield "", updated_history
|
|
|
45 |
head = '''
|
46 |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
|
47 |
<script type="module">
|
48 |
+
import { matccha_tts_onnx_env ,matcha_tts_raw_env} from "http://localhost:8000/js-esm/matcha_tts_onnx_en.js";
|
49 |
+
matccha_tts_onnx_env.matcha_tts_model_path = "/file=models/ljspeech_sim.onnx"
|
50 |
+
matcha_tts_raw_env.maxInputLength = 140 //if Device removed reason: DXGI_ERROR_DEVICE_HUNG happend reduce to HALF
|
51 |
</script>
|
52 |
'''
|
53 |
|
54 |
with gr.Blocks(title="LLM with TTS",head=head) as demo:
|
55 |
+
gr.Markdown("""
|
56 |
+
## Warnings
|
57 |
+
- Don't listen large volume or with headone until confirm your machine can play aduio
|
58 |
+
- some time gpu crash because of maxInputLength if you crash let me know with your gpu-info
|
59 |
+
## Notice
|
60 |
+
- LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.
|
61 |
+
- TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 40 seconds while loading.
|
62 |
+
|
63 |
+
""")
|
64 |
+
|
65 |
+
gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech** - LLM and TTS models will change without notice.")
|
66 |
|
67 |
js = """
|
68 |
+
async function(chatbot){
|
69 |
+
await window.matcha_tts_update_chatbot(chatbot)
|
70 |
//auto scroll
|
71 |
var chatElement = document.getElementById('gr-chatbot');
|
72 |
chatElement.scrollTop = chatElement.scrollHeight;
|
|
|
107 |
import os
|
108 |
remote_dir ="/home/user/app/"
|
109 |
local_dir = "C:\\Users\\owner\\Documents\\pythons\\huggingface\\mistral-7b-v0.3-matcha-tts-en" #sorry this is my develop env
|
110 |
+
|
111 |
# set not dir but file
|
112 |
#demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx")])
|
113 |
+
demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx"),os.path.join(local_dir,"models","ljspeech_sim.onnx")])
|