mistral-7b-v0.3-matcha-tts-en

Running

App Files Files Community

Akjava commited on Sep 24

Commit

8d11eb1

•

1 Parent(s): b9aa170

almsot 02 version

Browse files

Files changed (1) hide show

app.py +23 -16

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from huggingface_hub import InferenceClient
 client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
-#client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")
 def generate_text(messages):
@@ -13,12 +12,11 @@ def generate_text(messages):
     for token in client.chat_completion(messages, max_tokens=100,stream=True):
         content = (token.choices[0].delta.content)
         generated += content
-        #print(content)
-        #print(''.join(list(content)))
         yield generated
-        #print(token.choices[0].delta)
-    #return generated+"\n" #no stram version
 def call_generate_text(message, history):
     #if len(message) == 0:
@@ -35,7 +33,7 @@ def call_generate_text(message, history):
         text_generator = generate_text(messages)
         for text_chunk in text_generator:
-            print(f"chunk={text_chunk}")
             assistant_message["content"] = text_chunk
             updated_history = messages + [assistant_message]
             yield "", updated_history
@@ -47,19 +45,28 @@ def call_generate_text(message, history):
 head = '''
 <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
 <script type="module">
-        import { env } from "http://localhost:8000/js-esm/matcha_tts_onnx_en.js";
-        env.matcha_tts_model_path = "/file=models/ljspeech_sim.onnx"
 </script>
 '''
 with gr.Blocks(title="LLM with TTS",head=head) as demo:
-    gr.Markdown("## A LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.")
-    gr.Markdown("### TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 40 seconds while loading.")
-    gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech**.LLM and TTS models will change without notice.")
     js = """
-    function(chatbot){
-        window.update_chatbot(chatbot)
         //auto scroll
         var chatElement = document.getElementById('gr-chatbot');
         chatElement.scrollTop = chatElement.scrollHeight;
@@ -100,7 +107,7 @@ with gr.Blocks(title="LLM with TTS",head=head) as demo:
 import os
 remote_dir ="/home/user/app/"
 local_dir = "C:\\Users\\owner\\Documents\\pythons\\huggingface\\mistral-7b-v0.3-matcha-tts-en"  #sorry this is my develop env
-#print(os.path.join(dir,"js-esm","matcha_tts_onnx_en.js"))
 # set not dir but file
 #demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx")])
-demo.launch(allowed_paths=[os.path.join(local_dir,"models","ljspeech_sim.onnx")])

 client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.3")
 def generate_text(messages):
     for token in client.chat_completion(messages, max_tokens=100,stream=True):
         content = (token.choices[0].delta.content)
         generated += content
         yield generated
+    last = generated[-1]
+    if last not in [",",".","!","?"]:
+        yield generated+"," #no stram version
 def call_generate_text(message, history):
     #if len(message) == 0:
         text_generator = generate_text(messages)
         for text_chunk in text_generator:
+            #print(f"chunk={text_chunk}")
             assistant_message["content"] = text_chunk
             updated_history = messages + [assistant_message]
             yield "", updated_history
 head = '''
 <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.webgpu.min.js" ></script>
 <script type="module">
+        import { matccha_tts_onnx_env ,matcha_tts_raw_env} from "http://localhost:8000/js-esm/matcha_tts_onnx_en.js";
+        matccha_tts_onnx_env.matcha_tts_model_path = "/file=models/ljspeech_sim.onnx"
+        matcha_tts_raw_env.maxInputLength = 140 //if Device removed reason: DXGI_ERROR_DEVICE_HUNG happend reduce to HALF
 </script>
 '''
 with gr.Blocks(title="LLM with TTS",head=head) as demo:
+    gr.Markdown("""
+                ## Warnings
+                - Don't listen large volume or with headone until confirm your machine can play aduio
+                - some time gpu crash because of maxInputLength if you crash let me know with your gpu-info
+                ## Notice
+                - LLM is unstable:The inference client used in this demo exhibits inconsistent performance. While it can provide responses in milliseconds, it sometimes becomes unresponsive and times out.
+                - TTS talke a long loading time:Please be patient, the first response may have a delay of up to over 40 seconds while loading.
+                """)
+    gr.Markdown("**Mistral-7B-Instruct-v0.3/LJSpeech** - LLM and TTS models will change without notice.")
     js = """
+    async function(chatbot){
+        await window.matcha_tts_update_chatbot(chatbot)
         //auto scroll
         var chatElement = document.getElementById('gr-chatbot');
         chatElement.scrollTop = chatElement.scrollHeight;
 import os
 remote_dir ="/home/user/app/"
 local_dir = "C:\\Users\\owner\\Documents\\pythons\\huggingface\\mistral-7b-v0.3-matcha-tts-en"  #sorry this is my develop env
 # set not dir but file
 #demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx")])
+demo.launch(allowed_paths=[os.path.join(remote_dir,"models","ljspeech_sim.onnx"),os.path.join(local_dir,"models","ljspeech_sim.onnx")])