Vira21's picture
Update app.py
72aef28 verified
import gradio as gr
from transformers import pipeline
import torch
# Load the Whisper model pipeline for speech recognition with optimizations
model_name = "Vira21/Whisper-Small-Khmer"
whisper_pipeline = pipeline(
"automatic-speech-recognition",
model=model_name,
device=0 if torch.cuda.is_available() else -1 # Use GPU if available, otherwise use CPU
)
def transcribe_audio(audio):
try:
# Process and transcribe the audio
result = whisper_pipeline(audio)["text"]
return result
except Exception as e:
# Handle errors and return an error message
return f"An error occurred during transcription: {str(e)}"
# Gradio Interface with optimizations
interface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Whisper Small Khmer-Eng Speech-to-Text",
description="Upload an audio file or record your voice to get the transcription in Khmer-English.",
examples=[["Example Audio/126.wav"], ["Example Audio/232.wav"], ["Example Audio/tomholland28282.wav"]],
allow_flagging="never" # Disables flagging to save resources
)
# Launch the app with queue enabled for better handling on free CPU
if __name__ == "__main__":
interface.queue() # Enable asynchronous queuing for better performance
interface.launch()