import gradio as gr | |
import numpy as np | |
from transformers import pipeline | |
pipeline_en = pipeline(task="text-to-speech", model="facebook/mms-tts-eng") | |
pipeline_pt = pipeline(task="text-to-speech", model="facebook/mms-tts-por") | |
def tts(lang): | |
pipeline = pipeline_en if lang == "en" else pipeline_pt | |
def tts_lang(txt): | |
res = pipeline(txt) | |
audio = (res['audio'].reshape(-1) * 2 ** 15).astype(np.int16) | |
return res['sampling_rate'], audio | |
return tts_lang | |
with gr.Blocks() as demo: | |
gr.Interface( | |
tts("en"), | |
inputs=gr.Textbox( | |
lines=1, | |
value="one two three four", | |
), | |
outputs="audio", | |
) | |
gr.Interface( | |
tts("pt"), | |
inputs=gr.Textbox( | |
lines=1, | |
value="um dois tres quatro", | |
), | |
outputs="audio", | |
) | |
if __name__ == "__main__": | |
demo.launch() | |