import tempfile ,os import gradio as gr from transformers import VitsModel, AutoTokenizer,pipeline import torch import numpy as np import scipy def TTS(text): model = VitsModel.from_pretrained("SeyedAli/Persian-Speech-synthesis") tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Speech-synthesis") inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): output = model(**inputs).waveform # with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: # scipy.io.wavfile.write(fp, rate=model.config.sampling_rate, data=output) # return fp.name return output iface = gr.Interface(fn=TTS, inputs="text", outputs="text") iface.launch(share=False)