lijiacai commited on
Commit
3a17e04
1 Parent(s): 8ffefc6
Files changed (2) hide show
  1. main.py +57 -32
  2. model.py +1 -0
main.py CHANGED
@@ -6,12 +6,16 @@ import os
6
  from enum import Enum
7
  import uvicorn
8
  import time
 
9
  try:
10
- from model import text_to_speech
11
  except:
12
  def text_to_speech(voice, text):
13
  return f"static/zh/{voice}.mp3"
14
 
 
 
 
15
  description = """
16
  ## [接口文档](/docs)
17
  ## [效果演示](/)
@@ -22,6 +26,8 @@ description = """
22
 
23
  - 跨语言支持: 支持与训练数据集不同语言的推理,目前支持英语、日语和中文。
24
 
 
 
25
  """
26
  app = FastAPI(title="text to speech", description=description)
27
 
@@ -63,44 +69,63 @@ async def tts(
63
  return Response(audio_content, headers=headers)
64
 
65
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  class Demo:
67
  title = "text to speech"
68
  description = description
69
 
70
- @property
71
- def app(self):
72
- with gr.Blocks() as demo:
73
- self.layout()
74
- self.event()
75
- return demo
76
-
77
- def click_run_button(self, voice, text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  wav_path = text_to_speech(voice=voice, text=text)
79
  return wav_path
80
 
81
- def event(self):
82
- self.button.click(self.click_run_button, inputs=[
83
- self.voice, self.text], outputs=[self.audio])
84
-
85
- def layout(self):
86
- with gr.Row():
87
- gr.Markdown(value=self.description)
88
- with gr.Row():
89
- with gr.Column(scale=2):
90
- with gr.Row():
91
- self.text = gr.Textbox(label="请输入需要转换的文本")
92
- with gr.Row():
93
- self.voice = gr.Dropdown(
94
- ["新闻小说主播-女士", "温柔女士"],
95
- label="选择音色")
96
- with gr.Column(scale=2):
97
- self.audio = gr.Audio(label="转换后的音频", type="filepath", scale=3)
98
- with gr.Row():
99
- self.button = gr.Button()
100
-
101
-
102
- gr.mount_gradio_app(app, Demo().app, path="/")
103
 
104
  if __name__ == '__main__':
105
 
106
- uvicorn.run(app="main:app", port=int(7860), host="0.0.0.0")
 
 
6
  from enum import Enum
7
  import uvicorn
8
  import time
9
+ import tempfile
10
  try:
11
+ from model import text_to_speech, speech_to_text
12
  except:
13
  def text_to_speech(voice, text):
14
  return f"static/zh/{voice}.mp3"
15
 
16
+ def speech_to_text(voice: str):
17
+ return "文本测试",""
18
+
19
  description = """
20
  ## [接口文档](/docs)
21
  ## [效果演示](/)
 
26
 
27
  - 跨语言支持: 支持与训练数据集不同语言的推理,目前支持英语、日语和中文。
28
 
29
+ - 支持语音转文本/文本转语音
30
+
31
  """
32
  app = FastAPI(title="text to speech", description=description)
33
 
 
69
  return Response(audio_content, headers=headers)
70
 
71
 
72
+ @app.post("/stt")
73
+ async def tts(
74
+ voice: UploadFile = File(...)
75
+ ):
76
+ contents = await voice.read()
77
+ with tempfile.NamedTemporaryFile() as f:
78
+ f.write(contents)
79
+ f.flush()
80
+ text, _ = speech_to_text(f.name)
81
+ return {"text": text}
82
+
83
+
84
  class Demo:
85
  title = "text to speech"
86
  description = description
87
 
88
+ def __init__(self):
89
+ with gr.Blocks(theme=gr.themes.Soft()) as self.page:
90
+ with gr.Row():
91
+ gr.Markdown(value=self.description)
92
+ with gr.Row():
93
+ with gr.Column(scale=2):
94
+ with gr.Row():
95
+ text_tts = gr.Textbox(label="请输入需要转换的文本")
96
+ with gr.Row():
97
+ voice_tts = gr.Dropdown(
98
+ ["新闻小说主播-女士", "温柔女士"],
99
+ label="选择音色")
100
+ with gr.Row():
101
+ audio_tts = gr.Audio(
102
+ label="转换后的音频", type="filepath", scale=3)
103
+ with gr.Row():
104
+ button_tts = gr.Button(value="文本转语音")
105
+ with gr.Column(scale=2):
106
+ audio_stt = gr.Audio(
107
+ label="上传语音", type="filepath", scale=3)
108
+ with gr.Row():
109
+ button_stt = gr.Button(value="文本转语音")
110
+ text_stt = gr.Text(label="结果")
111
+ # 事件
112
+ button_tts.click(self.click_run_button_tts, inputs=[
113
+ voice_tts, text_tts], outputs=[audio_tts])
114
+ button_stt.click(self.click_run_button_stt, inputs=[
115
+ audio_stt], outputs=[text_stt])
116
+
117
+ def click_run_button_tts(self, voice, text):
118
  wav_path = text_to_speech(voice=voice, text=text)
119
  return wav_path
120
 
121
+ def click_run_button_stt(self, audio):
122
+ text,_ = speech_to_text(voice=audio)
123
+ return text
124
+
125
+
126
+ gr.mount_gradio_app(app, Demo().page, path="/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  if __name__ == '__main__':
129
 
130
+ uvicorn.run(app="main:app", port=int(
131
+ os.environ.get("PORT", 7860)), host="0.0.0.0")
model.py CHANGED
@@ -761,3 +761,4 @@ def text_to_speech(voice, text):
761
  user_voice=voice, user_text=text, user_lang=language)
762
  return wav_path
763
 
 
 
761
  user_voice=voice, user_text=text, user_lang=language)
762
  return wav_path
763
 
764
+ speech_to_text = transcribe