iflamed commited on
Commit
fff6f9f
1 Parent(s): 4e43a9d

add download models script and fastapi server to serve tts

Browse files
Files changed (4) hide show
  1. README.md +12 -8
  2. download.py +6 -0
  3. main.py +40 -0
  4. requirements.txt +3 -1
README.md CHANGED
@@ -37,17 +37,13 @@ We strongly recommend that you download our pretrained `CosyVoice-300M` `CosyVoi
37
 
38
  If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
39
 
40
- ``` python
41
- # SDK模型下载
42
- from modelscope import snapshot_download
43
- snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
44
- snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
45
- snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
46
- snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
47
  ```
48
 
 
49
  ``` sh
50
- # git模型下载,请确保已安装git lfs
51
  mkdir -p pretrained_models
52
  git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
53
  git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
@@ -120,6 +116,14 @@ python3 webui.py --port 50000 --model_dir pretrained_models/CosyVoice-300M
120
  For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
121
  You can get familiar with CosyVoice following this recipie.
122
 
 
 
 
 
 
 
 
 
123
  **Build for deployment**
124
 
125
  Optionally, if you want to use grpc for service deployment,
 
37
 
38
  If you are expert in this field, and you are only interested in training your own CosyVoice model from scratch, you can skip this step.
39
 
40
+ Download models with python script.
41
+ ``` shell
42
+ python download.py
 
 
 
 
43
  ```
44
 
45
+ Download models with git, you should install `git lfs` first.
46
  ``` sh
 
47
  mkdir -p pretrained_models
48
  git clone https://www.modelscope.cn/iic/CosyVoice-300M.git pretrained_models/CosyVoice-300M
49
  git clone https://www.modelscope.cn/iic/CosyVoice-300M-SFT.git pretrained_models/CosyVoice-300M-SFT
 
116
  For advanced user, we have provided train and inference scripts in `examples/libritts/cosyvoice/run.sh`.
117
  You can get familiar with CosyVoice following this recipie.
118
 
119
+ **Serve with FastAPI**
120
+ ```sh
121
+ # For development
122
+ fastapi dev --port 3003
123
+ # For production
124
+ fastapi run --port 3003
125
+ ```
126
+
127
  **Build for deployment**
128
 
129
  Optionally, if you want to use grpc for service deployment,
download.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # SDK模型下载
2
+ from modelscope import snapshot_download
3
+ snapshot_download('iic/CosyVoice-300M', local_dir='pretrained_models/CosyVoice-300M')
4
+ snapshot_download('iic/CosyVoice-300M-SFT', local_dir='pretrained_models/CosyVoice-300M-SFT')
5
+ snapshot_download('iic/CosyVoice-300M-Instruct', local_dir='pretrained_models/CosyVoice-300M-Instruct')
6
+ snapshot_download('iic/CosyVoice-ttsfrd', local_dir='pretrained_models/CosyVoice-ttsfrd')
main.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io,time
2
+ from fastapi import FastAPI, Response
3
+ from fastapi.responses import HTMLResponse
4
+ from cosyvoice.cli.cosyvoice import CosyVoice
5
+ import torchaudio
6
+
7
+ cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
8
+ # sft usage
9
+ print(cosyvoice.list_avaliable_spks())
10
+ app = FastAPI()
11
+
12
+ @app.get("/api/voice/tts")
13
+ async def tts(query: str, role: str):
14
+ start = time.process_time()
15
+ output = cosyvoice.inference_sft(query, role)
16
+ end = time.process_time()
17
+ print("infer time:", end-start, "seconds")
18
+ buffer = io.BytesIO()
19
+ torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
20
+ buffer.seek(0)
21
+ return Response(content=buffer.read(-1), media_type="audio/wav")
22
+
23
+ @app.get("/api/voice/roles")
24
+ async def roles():
25
+ return {"roles": cosyvoice.list_avaliable_spks()}
26
+
27
+ @app.get("/", response_class=HTMLResponse)
28
+ async def root():
29
+ return """
30
+ <!DOCTYPE html>
31
+ <html lang=zh-cn>
32
+ <head>
33
+ <meta charset=utf-8>
34
+ <title>Api information</title>
35
+ </head>
36
+ <body>
37
+ Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
38
+ </body>
39
+ </html>
40
+ """
requirements.txt CHANGED
@@ -25,4 +25,6 @@ soundfile==0.12.1
25
  tensorboard==2.14.0
26
  torch==2.0.1
27
  torchaudio==2.0.2
28
- wget==3.2
 
 
 
25
  tensorboard==2.14.0
26
  torch==2.0.1
27
  torchaudio==2.0.2
28
+ wget==3.2
29
+ fastapi==0.111.0
30
+ fastapi-cli==0.0.4