import tempfile ,os import gradio as gr from transformers import VitsModel, AutoTokenizer,pipeline import torch import numpy as np import torchaudio model = VitsModel.from_pretrained("SeyedAli/Persian-Speech-synthesis") tokenizer = AutoTokenizer.from_pretrained("SeyedAli/Persian-Speech-synthesis") text_input = gr.TextArea(label="متن فارسی",text_align="right",rtl=True,type="text") audio_output = gr.Audio(label="صوت گفتار فارسی", type="filepath") def TTS(text): inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): output = model(**inputs).waveform with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: torchaudio.save(fp, output, model.config.sampling_rate,format="wav") return fp.name iface = gr.Interface(fn=TTS, inputs=text_input, outputs=audio_output) iface.launch(share=False)