|
import tempfile ,os |
|
import gradio as gr |
|
from transformers import VitsModel, AutoTokenizer,pipeline |
|
import torch |
|
import numpy as np |
|
import torchaudio |
|
|
|
|
|
def TTS(text): |
|
model = VitsModel.from_pretrained("SeyedAli/Persian-Speech-synthesis") |
|
tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Speech-synthesis") |
|
inputs = tokenizer(text, return_tensors="pt") |
|
pipe = pipeline("text-to-speech", model=model,tokenizer=tokenizer) |
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: |
|
torchaudio.save(fp, pipe(text)['audio'], rate=pipe(text)['sampling_rate']) |
|
return fp.name |
|
iface = gr.Interface(fn=TTS, inputs="text", outputs="audio") |
|
iface.launch(share=False) |