Spaces:

akhaliq
/

Real-Time-Voice-Cloning

Runtime error

App Files Files Community

radames commited on Feb 7, 2023

Commit

593d65c

•

1 Parent(s): 7967068

Fix demo and freeze requirements and UI improvement

Browse files

This fix the demo
* the models file are now located in a google drive from original author
* Microphone input alternative
* Update Gradio

Files changed (4) hide show

README.md +2 -0
app.py +48 -9
demo_cli.py +4 -1
requirements.txt +19 -16

README.md CHANGED Viewed

@@ -5,7 +5,9 @@ colorFrom: blue
 colorTo: red
 sdk: gradio
 app_file: app.py
 pinned: false
 ---
 # Configuration

 colorTo: red
 sdk: gradio
 app_file: app.py
+sdk_version: 3.17.1
 pinned: false
+duplicated_from: akhaliq/Real-Time-Voice-Cloning
 ---
 # Configuration

app.py CHANGED Viewed

@@ -1,22 +1,61 @@
 import gradio as gr
 import os
 import shlex
-os.system('wget https://www.dropbox.com/s/luro5o8kjotkn70/synpretrained.pt')
-os.system('wget https://www.dropbox.com/s/dv0ymnlqillecfw/encpretrained.pt')
-os.system('wget https://www.dropbox.com/s/aiym2qfv7087bsc/vocpretrained.pt')
-os.system('ls')
-def inference(audio, text):
-    os.system("python demo_cli.py --no_sound --cpu --audio_path "+audio.name+" --text "+shlex.quote(text.strip()))
-    return 'demo_output_1.wav'
 title = "Real-Time-Voice-Cloning"
 description = "Gradio demo for Real-Time-Voice-Cloning: Clone a voice in 5 seconds to generate arbitrary speech in real-time. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://matheo.uliege.be/handle/2268.2/6801' target='_blank'>Real-Time Voice Cloning</a> | <a href='https://github.com/CorentinJ/Real-Time-Voice-Cloning' target='_blank'>Github Repo</a></p>"
-examples=[['test.wav',"This is real time voice cloning on huggingface spaces"]]
-gr.Interface(inference, inputs=[gr.inputs.Audio(type="file"),"text"], outputs=gr.outputs.Audio(type="file"),enable_queue=True,title=title,description=description,article=article, examples=examples).launch()

 import gradio as gr
 import os
 import shlex
+import gdown
+import uuid
+import torch
+cpu_param = "--cpu" if not torch.cuda.is_available() else ""
+if (not os.path.exists("synpretrained.pt")):
+    gdown.download("https://drive.google.com/u/0/uc?id=1EqFMIbvxffxtjiVrtykroF6_mUh-5Z3s&export=download&confirm=t",
+                   "synpretrained.pt", quiet=False)
+    gdown.download("https://drive.google.com/uc?export=download&id=1q8mEGwCkFy23KZsinbuvdKAQLqNKbYf1",
+                   "encpretrained.pt", quiet=False)
+    gdown.download("https://drive.google.com/uc?export=download&id=1cf2NO6FtI0jDuy8AV3Xgn6leO6dHjIgu",
+                   "vocpretrained.pt", quiet=False)
+def inference(audio_path, text, mic_path=None):
+    if mic_path:
+        audio_path = mic_path
+    output_path = f"/tmp/output_{uuid.uuid4()}.wav"
+    os.system(
+        f"python demo_cli.py --no_sound {cpu_param} --audio_path {audio_path} --text {shlex.quote(text.strip())} --output_path {output_path}")
+    return output_path
 title = "Real-Time-Voice-Cloning"
 description = "Gradio demo for Real-Time-Voice-Cloning: Clone a voice in 5 seconds to generate arbitrary speech in real-time. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
 article = "<p style='text-align: center'><a href='https://matheo.uliege.be/handle/2268.2/6801' target='_blank'>Real-Time Voice Cloning</a> | <a href='https://github.com/CorentinJ/Real-Time-Voice-Cloning' target='_blank'>Github Repo</a></p>"
+examples = [['test.wav', "This is real time voice cloning on huggingface spaces"]]
+def toggle(choice):
+    if choice == "mic":
+        return gr.update(visible=True), gr.update(visible=False)
+    else:
+        return gr.update(visible=False), gr.update(visible=True)
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            radio = gr.Radio(["mic", "file"], value="mic",
+                             label="How would you like to upload your audio?")
+            mic_input = gr.Mic(label="Input", type="filepath", visible=False)
+            audio_file = gr.Audio(
+                type="filepath", label="Input", visible=True)
+            text_input = gr.Textbox(label="Text")
+        with gr.Column():
+            audio_output = gr.Audio(label="Output")
+    gr.Examples(examples, fn=inference, inputs=[audio_file, text_input],
+                      outputs=audio_output, cache_examples=True)
+    btn = gr.Button("Generate")
+    btn.click(inference, inputs=[audio_file,
+              text_input, mic_input], outputs=audio_output)
+    radio.change(toggle, radio, [mic_input, audio_file])
+demo.launch(enable_queue=True)

demo_cli.py CHANGED Viewed

@@ -14,6 +14,7 @@ import sys
 import os
 from audioread.exceptions import NoBackendError
 if __name__ == '__main__':
     ## Info & args
     parser = argparse.ArgumentParser(
@@ -35,6 +36,8 @@ if __name__ == '__main__':
     parser.add_argument("-audio", "--audio_path", type=Path, required = True,
                         help="Path to a audio file")
     parser.add_argument("--text", type=str, required = True, help="Text Input")
     args = parser.parse_args()
     print_args(args, parser)
     if not args.no_sound:
@@ -197,7 +200,7 @@ if __name__ == '__main__':
     generated_wav = encoder.preprocess_wav(generated_wav)
     # Save it on the disk
-    filename = "demo_output_1.wav"
     print(generated_wav.dtype)
     sf.write(filename, generated_wav.astype(np.float32), synthesizer.sample_rate)
     print("\nSaved output as %s\n\n" % filename)

 import os
 from audioread.exceptions import NoBackendError
 if __name__ == '__main__':
     ## Info & args
     parser = argparse.ArgumentParser(
     parser.add_argument("-audio", "--audio_path", type=Path, required = True,
                         help="Path to a audio file")
     parser.add_argument("--text", type=str, required = True, help="Text Input")
+    parser.add_argument("--output_path", type=str, required = True, help="output file path")
     args = parser.parse_args()
     print_args(args, parser)
     if not args.no_sound:
     generated_wav = encoder.preprocess_wav(generated_wav)
     # Save it on the disk
+    filename = args.output_path
     print(generated_wav.dtype)
     sf.write(filename, generated_wav.astype(np.float32), synthesizer.sample_rate)
     print("\nSaved output as %s\n\n" % filename)

requirements.txt CHANGED Viewed

@@ -1,16 +1,19 @@
-umap-learn
-visdom
-librosa>=0.8.0
-matplotlib>=3.3.0
-numpy==1.19.3; platform_system == "Windows"
-numpy==1.19.4; platform_system != "Windows"
-scipy>=1.0.0
-tqdm
-sounddevice
-SoundFile
-Unidecode
-inflect
-multiprocess
-numba
-webrtcvad; platform_system != "Windows"
-torch

+inflect==5.3.0
+librosa==0.8.1
+matplotlib==3.5.1
+numpy
+Pillow==8.4.0
+PyQt5==5.15.6
+scikit-learn==1.0.2
+scipy==1.7.3
+sounddevice==0.4.3
+SoundFile==0.10.3.post1
+tqdm==4.62.3
+umap-learn==0.5.2
+Unidecode==1.3.2
+urllib3==1.26.7
+visdom==0.1.8.9
+webrtcvad==2.0.10
+gradio==3.17.1
+gdown
+torch