TTS-GPT-SoVITS

Sleeping

App Files Files Community

lijiacai commited on Jun 5

Commit

3a17e04

•

1 Parent(s): 8ffefc6

add stt

Browse files

Files changed (2) hide show

main.py +57 -32
model.py +1 -0

main.py CHANGED Viewed

@@ -6,12 +6,16 @@ import os
 from enum import Enum
 import uvicorn
 import time
 try:
-    from model import text_to_speech
 except:
     def text_to_speech(voice, text):
         return f"static/zh/{voice}.mp3"
 description = """
 ## [接口文档](/docs)
 ## [效果演示](/)
@@ -22,6 +26,8 @@ description = """
 - 跨语言支持： 支持与训练数据集不同语言的推理，目前支持英语、日语和中文。
 """
 app = FastAPI(title="text to speech", description=description)
@@ -63,44 +69,63 @@ async def tts(
     return Response(audio_content,  headers=headers)
 class Demo:
     title = "text to speech"
     description = description
-    @property
-    def app(self):
-        with gr.Blocks() as demo:
-            self.layout()
-            self.event()
-        return demo
-    def click_run_button(self, voice, text):
         wav_path = text_to_speech(voice=voice, text=text)
         return wav_path
-    def event(self):
-        self.button.click(self.click_run_button, inputs=[
-                          self.voice, self.text], outputs=[self.audio])
-    def layout(self):
-        with gr.Row():
-            gr.Markdown(value=self.description)
-        with gr.Row():
-            with gr.Column(scale=2):
-                with gr.Row():
-                    self.text = gr.Textbox(label="请输入需要转换的文本")
-                with gr.Row():
-                    self.voice = gr.Dropdown(
-                        ["新闻小说主播-女士", "温柔女士"],
-                        label="选择音色")
-            with gr.Column(scale=2):
-                self.audio = gr.Audio(label="转换后的音频", type="filepath", scale=3)
-        with gr.Row():
-            self.button = gr.Button()
-gr.mount_gradio_app(app, Demo().app, path="/")
 if __name__ == '__main__':
-    uvicorn.run(app="main:app", port=int(7860), host="0.0.0.0")

 from enum import Enum
 import uvicorn
 import time
+import tempfile
 try:
+    from model import text_to_speech, speech_to_text
 except:
     def text_to_speech(voice, text):
         return f"static/zh/{voice}.mp3"
+    def speech_to_text(voice: str):
+        return "文本测试",""
 description = """
 ## [接口文档](/docs)
 ## [效果演示](/)
 - 跨语言支持： 支持与训练数据集不同语言的推理，目前支持英语、日语和中文。
+- 支持语音转文本/文本转语音
 """
 app = FastAPI(title="text to speech", description=description)
     return Response(audio_content,  headers=headers)
+@app.post("/stt")
+async def tts(
+        voice: UploadFile = File(...)
+):
+    contents = await voice.read()
+    with tempfile.NamedTemporaryFile() as f:
+        f.write(contents)
+        f.flush()
+    text, _ = speech_to_text(f.name)
+    return {"text": text}
 class Demo:
     title = "text to speech"
     description = description
+    def __init__(self):
+        with gr.Blocks(theme=gr.themes.Soft()) as self.page:
+            with gr.Row():
+                gr.Markdown(value=self.description)
+            with gr.Row():
+                with gr.Column(scale=2):
+                    with gr.Row():
+                        text_tts = gr.Textbox(label="请输入需要转换的文本")
+                    with gr.Row():
+                        voice_tts = gr.Dropdown(
+                            ["新闻小说主播-女士", "温柔女士"],
+                            label="选择音色")
+                    with gr.Row():
+                        audio_tts = gr.Audio(
+                            label="转换后的音频", type="filepath", scale=3)
+                    with gr.Row():
+                        button_tts = gr.Button(value="文本转语音")
+                with gr.Column(scale=2):
+                    audio_stt = gr.Audio(
+                        label="上传语音", type="filepath", scale=3)
+                    with gr.Row():
+                        button_stt = gr.Button(value="文本转语音")
+                        text_stt = gr.Text(label="结果")
+            # 事件
+            button_tts.click(self.click_run_button_tts, inputs=[
+                voice_tts, text_tts], outputs=[audio_tts])
+            button_stt.click(self.click_run_button_stt, inputs=[
+                audio_stt], outputs=[text_stt])
+    def click_run_button_tts(self, voice, text):
         wav_path = text_to_speech(voice=voice, text=text)
         return wav_path
+    def click_run_button_stt(self, audio):
+        text,_ = speech_to_text(voice=audio)
+        return text
+gr.mount_gradio_app(app, Demo().page, path="/")
 if __name__ == '__main__':
+    uvicorn.run(app="main:app", port=int(
+        os.environ.get("PORT", 7860)), host="0.0.0.0")

model.py CHANGED Viewed

@@ -761,3 +761,4 @@ def text_to_speech(voice, text):
         user_voice=voice, user_text=text, user_lang=language)
     return wav_path

         user_voice=voice, user_text=text, user_lang=language)
     return wav_path
+speech_to_text = transcribe