hosseinhimself commited on
Commit
fbfdbd2
1 Parent(s): 652ef7e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
+ import librosa
5
+
6
+ # Load the model and processor
7
+ model_name = "lnxdx/Wav2Vec2-Large-XLSR-Persian-ShEMO"
8
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
9
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
10
+
11
+ # Function to transcribe audio
12
+ def transcribe(audio):
13
+ # Load the audio
14
+ audio, _ = librosa.load(audio, sr=16000)
15
+
16
+ # Preprocess the audio
17
+ input_values = processor(audio, return_tensors="pt", sampling_rate=16000).input_values
18
+
19
+ # Perform inference
20
+ with torch.no_grad():
21
+ logits = model(input_values).logits
22
+
23
+ # Decode the logits to text
24
+ predicted_ids = torch.argmax(logits, dim=-1)
25
+ transcription = processor.decode(predicted_ids[0])
26
+
27
+ return transcription
28
+
29
+ # Create the Gradio interface
30
+ iface = gr.Interface(
31
+ fn=transcribe,
32
+ inputs=gr.inputs.Audio(source="microphone", type="filepath"),
33
+ outputs="text",
34
+ title="Persian Speech Recognition",
35
+ description="Transcribe Persian speech to text using Wav2Vec2-Large-XLSR-Persian-ShEMO model."
36
+ )
37
+
38
+ # Launch the interface
39
+ iface.launch()