SpeakerCreater / app.py
CazC's picture
Add text input and generate audio output
c607d95
raw
history blame contribute delete
No virus
1.02 kB
import gradio as gr
import numpy as np
import scipy.io.wavfile
import torch
import torch.nn.functional as F
from whisperspeech.pipeline import Pipeline
import time
def process_audio(audio_elem,text="This is a test voice genereation"):
scipy.io.wavfile.write('test.mp3', audio_elem[0], audio_elem[1])
# print out details about ut
pipe = Pipeline(s2a_ref='collabora/whisperspeech:s2a-q4-base-en+pl.model')
# save audio_elem as a file
speaker = pipe.extract_spk_emb("test.mp3")
speaker2 = speaker.cpu().numpy() # Move tensor from GPU to CPU and convert to numpy array
#save it locally
np.savez_compressed("speaker", features=speaker2)
try:
pipe.generate_to_file('test.wav', text, lang='en', cps=10.5, speaker=speaker)
except Exception as e:
print("Error: ", e)
return "speaker.npz", "test.wav"
# Define Gradio interface
with gr.Interface(fn=process_audio, inputs=["audio","text"], outputs=["file",'audio']) as iface:
iface.launch()