|
import tempfile ,os |
|
import gradio as gr |
|
from transformers import VitsModel, AutoTokenizer,pipeline |
|
import torch |
|
import numpy as np |
|
import torchaudio |
|
|
|
|
|
from transformers import AutoProcessor, AutoModelForCTC |
|
|
|
processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1") |
|
model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1") |
|
|
|
def ASR(audio): |
|
pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1") |
|
text=pipe(torchaudio.load(audio)) |
|
return text |
|
iface = gr.Interface(fn=TTS, inputs="audio", outputs="text") |
|
iface.launch(share=False) |