File size: 682 Bytes
12a7531
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import tempfile ,os
import gradio as gr
from transformers import VitsModel, AutoTokenizer,pipeline
import torch
import numpy as np
import torchaudio

# Load model directly
from transformers import AutoProcessor, AutoModelForCTC

processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")

def ASR(audio):
   pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
   text=pipe(torchaudio.load(audio))
   return text
iface = gr.Interface(fn=TTS, inputs="audio", outputs="text")
iface.launch(share=False)