radames commited on
Commit
593d65c
1 Parent(s): 7967068

Fix demo and freeze requirements and UI improvement

Browse files

This fix the demo
* the models file are now located in a google drive from original author
* Microphone input alternative
* Update Gradio

Files changed (4) hide show
  1. README.md +2 -0
  2. app.py +48 -9
  3. demo_cli.py +4 -1
  4. requirements.txt +19 -16
README.md CHANGED
@@ -5,7 +5,9 @@ colorFrom: blue
5
  colorTo: red
6
  sdk: gradio
7
  app_file: app.py
 
8
  pinned: false
 
9
  ---
10
 
11
  # Configuration
 
5
  colorTo: red
6
  sdk: gradio
7
  app_file: app.py
8
+ sdk_version: 3.17.1
9
  pinned: false
10
+ duplicated_from: akhaliq/Real-Time-Voice-Cloning
11
  ---
12
 
13
  # Configuration
app.py CHANGED
@@ -1,22 +1,61 @@
1
  import gradio as gr
2
  import os
3
  import shlex
 
 
 
4
 
5
- os.system('wget https://www.dropbox.com/s/luro5o8kjotkn70/synpretrained.pt')
6
- os.system('wget https://www.dropbox.com/s/dv0ymnlqillecfw/encpretrained.pt')
7
- os.system('wget https://www.dropbox.com/s/aiym2qfv7087bsc/vocpretrained.pt')
8
- os.system('ls')
9
 
 
 
 
 
 
 
 
10
 
11
- def inference(audio, text):
12
- os.system("python demo_cli.py --no_sound --cpu --audio_path "+audio.name+" --text "+shlex.quote(text.strip()))
13
- return 'demo_output_1.wav'
 
 
 
 
 
14
 
15
 
16
  title = "Real-Time-Voice-Cloning"
17
  description = "Gradio demo for Real-Time-Voice-Cloning: Clone a voice in 5 seconds to generate arbitrary speech in real-time. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
18
  article = "<p style='text-align: center'><a href='https://matheo.uliege.be/handle/2268.2/6801' target='_blank'>Real-Time Voice Cloning</a> | <a href='https://github.com/CorentinJ/Real-Time-Voice-Cloning' target='_blank'>Github Repo</a></p>"
19
 
20
- examples=[['test.wav',"This is real time voice cloning on huggingface spaces"]]
21
- gr.Interface(inference, inputs=[gr.inputs.Audio(type="file"),"text"], outputs=gr.outputs.Audio(type="file"),enable_queue=True,title=title,description=description,article=article, examples=examples).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
 
1
  import gradio as gr
2
  import os
3
  import shlex
4
+ import gdown
5
+ import uuid
6
+ import torch
7
 
8
+ cpu_param = "--cpu" if not torch.cuda.is_available() else ""
 
 
 
9
 
10
+ if (not os.path.exists("synpretrained.pt")):
11
+ gdown.download("https://drive.google.com/u/0/uc?id=1EqFMIbvxffxtjiVrtykroF6_mUh-5Z3s&export=download&confirm=t",
12
+ "synpretrained.pt", quiet=False)
13
+ gdown.download("https://drive.google.com/uc?export=download&id=1q8mEGwCkFy23KZsinbuvdKAQLqNKbYf1",
14
+ "encpretrained.pt", quiet=False)
15
+ gdown.download("https://drive.google.com/uc?export=download&id=1cf2NO6FtI0jDuy8AV3Xgn6leO6dHjIgu",
16
+ "vocpretrained.pt", quiet=False)
17
 
18
+
19
+ def inference(audio_path, text, mic_path=None):
20
+ if mic_path:
21
+ audio_path = mic_path
22
+ output_path = f"/tmp/output_{uuid.uuid4()}.wav"
23
+ os.system(
24
+ f"python demo_cli.py --no_sound {cpu_param} --audio_path {audio_path} --text {shlex.quote(text.strip())} --output_path {output_path}")
25
+ return output_path
26
 
27
 
28
  title = "Real-Time-Voice-Cloning"
29
  description = "Gradio demo for Real-Time-Voice-Cloning: Clone a voice in 5 seconds to generate arbitrary speech in real-time. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
30
  article = "<p style='text-align: center'><a href='https://matheo.uliege.be/handle/2268.2/6801' target='_blank'>Real-Time Voice Cloning</a> | <a href='https://github.com/CorentinJ/Real-Time-Voice-Cloning' target='_blank'>Github Repo</a></p>"
31
 
32
+ examples = [['test.wav', "This is real time voice cloning on huggingface spaces"]]
33
+
34
+
35
+ def toggle(choice):
36
+ if choice == "mic":
37
+ return gr.update(visible=True), gr.update(visible=False)
38
+ else:
39
+ return gr.update(visible=False), gr.update(visible=True)
40
+
41
+
42
+ with gr.Blocks() as demo:
43
+ with gr.Row():
44
+ with gr.Column():
45
+ radio = gr.Radio(["mic", "file"], value="mic",
46
+ label="How would you like to upload your audio?")
47
+ mic_input = gr.Mic(label="Input", type="filepath", visible=False)
48
+ audio_file = gr.Audio(
49
+ type="filepath", label="Input", visible=True)
50
+ text_input = gr.Textbox(label="Text")
51
+ with gr.Column():
52
+ audio_output = gr.Audio(label="Output")
53
+
54
+ gr.Examples(examples, fn=inference, inputs=[audio_file, text_input],
55
+ outputs=audio_output, cache_examples=True)
56
+ btn = gr.Button("Generate")
57
+ btn.click(inference, inputs=[audio_file,
58
+ text_input, mic_input], outputs=audio_output)
59
+ radio.change(toggle, radio, [mic_input, audio_file])
60
 
61
+ demo.launch(enable_queue=True)
demo_cli.py CHANGED
@@ -14,6 +14,7 @@ import sys
14
  import os
15
  from audioread.exceptions import NoBackendError
16
 
 
17
  if __name__ == '__main__':
18
  ## Info & args
19
  parser = argparse.ArgumentParser(
@@ -35,6 +36,8 @@ if __name__ == '__main__':
35
  parser.add_argument("-audio", "--audio_path", type=Path, required = True,
36
  help="Path to a audio file")
37
  parser.add_argument("--text", type=str, required = True, help="Text Input")
 
 
38
  args = parser.parse_args()
39
  print_args(args, parser)
40
  if not args.no_sound:
@@ -197,7 +200,7 @@ if __name__ == '__main__':
197
  generated_wav = encoder.preprocess_wav(generated_wav)
198
 
199
  # Save it on the disk
200
- filename = "demo_output_1.wav"
201
  print(generated_wav.dtype)
202
  sf.write(filename, generated_wav.astype(np.float32), synthesizer.sample_rate)
203
  print("\nSaved output as %s\n\n" % filename)
 
14
  import os
15
  from audioread.exceptions import NoBackendError
16
 
17
+
18
  if __name__ == '__main__':
19
  ## Info & args
20
  parser = argparse.ArgumentParser(
 
36
  parser.add_argument("-audio", "--audio_path", type=Path, required = True,
37
  help="Path to a audio file")
38
  parser.add_argument("--text", type=str, required = True, help="Text Input")
39
+ parser.add_argument("--output_path", type=str, required = True, help="output file path")
40
+
41
  args = parser.parse_args()
42
  print_args(args, parser)
43
  if not args.no_sound:
 
200
  generated_wav = encoder.preprocess_wav(generated_wav)
201
 
202
  # Save it on the disk
203
+ filename = args.output_path
204
  print(generated_wav.dtype)
205
  sf.write(filename, generated_wav.astype(np.float32), synthesizer.sample_rate)
206
  print("\nSaved output as %s\n\n" % filename)
requirements.txt CHANGED
@@ -1,16 +1,19 @@
1
- umap-learn
2
- visdom
3
- librosa>=0.8.0
4
- matplotlib>=3.3.0
5
- numpy==1.19.3; platform_system == "Windows"
6
- numpy==1.19.4; platform_system != "Windows"
7
- scipy>=1.0.0
8
- tqdm
9
- sounddevice
10
- SoundFile
11
- Unidecode
12
- inflect
13
- multiprocess
14
- numba
15
- webrtcvad; platform_system != "Windows"
16
- torch
 
 
 
 
1
+ inflect==5.3.0
2
+ librosa==0.8.1
3
+ matplotlib==3.5.1
4
+ numpy
5
+ Pillow==8.4.0
6
+ PyQt5==5.15.6
7
+ scikit-learn==1.0.2
8
+ scipy==1.7.3
9
+ sounddevice==0.4.3
10
+ SoundFile==0.10.3.post1
11
+ tqdm==4.62.3
12
+ umap-learn==0.5.2
13
+ Unidecode==1.3.2
14
+ urllib3==1.26.7
15
+ visdom==0.1.8.9
16
+ webrtcvad==2.0.10
17
+ gradio==3.17.1
18
+ gdown
19
+ torch