SeyedAli's picture
Rename app.txt to app.py
12a7531
raw
history blame
682 Bytes
import tempfile ,os
import gradio as gr
from transformers import VitsModel, AutoTokenizer,pipeline
import torch
import numpy as np
import torchaudio
# Load model directly
from transformers import AutoProcessor, AutoModelForCTC
processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
def ASR(audio):
pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
text=pipe(torchaudio.load(audio))
return text
iface = gr.Interface(fn=TTS, inputs="audio", outputs="text")
iface.launch(share=False)