import tempfile ,os import gradio as gr from transformers import VitsModel, AutoTokenizer,pipeline import torch import numpy as np import torchaudio def TTS(text): model = VitsModel.from_pretrained("SeyedAli/Persian-Speech-synthesis") tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Speech-synthesis") inputs = tokenizer(text, return_tensors="pt") pipe = pipeline("text-to-speech", model=model,tokenizer=tokenizer) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: torchaudio.save(fp, pipe(text)['audio'], rate=pipe(text)['sampling_rate']) return fp.name iface = gr.Interface(fn=TTS, inputs="text", outputs="audio") iface.launch(share=False)