File size: 682 Bytes
12a7531 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
import tempfile ,os
import gradio as gr
from transformers import VitsModel, AutoTokenizer,pipeline
import torch
import numpy as np
import torchaudio
# Load model directly
from transformers import AutoProcessor, AutoModelForCTC
processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
def ASR(audio):
pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
text=pipe(torchaudio.load(audio))
return text
iface = gr.Interface(fn=TTS, inputs="audio", outputs="text")
iface.launch(share=False) |