Vira21 commited on
Commit
c10cdf4
1 Parent(s): f16c081

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -1,21 +1,17 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
4
- import spaces
5
 
6
  # Load the Whisper model pipeline for speech recognition with optimizations
7
  model_name = "Vira21/Whisper-Base-KhmerV2"
8
  whisper_pipeline = pipeline(
9
  "automatic-speech-recognition",
10
  model=model_name,
11
- device="cuda" if torch.cuda.is_available() else "CPU"
12
  )
13
 
14
  def transcribe_audio(audio):
15
  try:
16
- if audio is None:
17
- return "No audio provided. Please upload an audio file or record your voice."
18
-
19
  # Process and transcribe the audio
20
  result = whisper_pipeline(audio)["text"]
21
  return result
@@ -28,12 +24,13 @@ interface = gr.Interface(
28
  fn=transcribe_audio,
29
  inputs=gr.Audio(type="filepath"),
30
  outputs="text",
31
- title="OpenAI Whisper Small KHMER-ENGLISH Speech-to-Text",
32
- description="Upload an audio file or record your voice to get the transcription.",
33
- examples=[["Example Audio/126.wav"], ["Example Audio/tomholland28282.wav"]],
34
  allow_flagging="never" # Disables flagging to save resources
35
  )
36
 
37
  # Launch the app with queue enabled for better handling on free CPU
38
  if __name__ == "__main__":
39
- interface.launch()
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
 
4
 
5
  # Load the Whisper model pipeline for speech recognition with optimizations
6
  model_name = "Vira21/Whisper-Base-KhmerV2"
7
  whisper_pipeline = pipeline(
8
  "automatic-speech-recognition",
9
  model=model_name,
10
+ device=0 if torch.cuda.is_available() else -1 # Use GPU if available, otherwise use CPU
11
  )
12
 
13
  def transcribe_audio(audio):
14
  try:
 
 
 
15
  # Process and transcribe the audio
16
  result = whisper_pipeline(audio)["text"]
17
  return result
 
24
  fn=transcribe_audio,
25
  inputs=gr.Audio(type="filepath"),
26
  outputs="text",
27
+ title="Whisper Base Khmer Speech-to-Text",
28
+ description="Upload an audio file or record your voice to get the transcription in Khmer.",
29
+ examples=[["Example Audio/126.wav"]. ["Example Audio/tomholland28282.wav"]],
30
  allow_flagging="never" # Disables flagging to save resources
31
  )
32
 
33
  # Launch the app with queue enabled for better handling on free CPU
34
  if __name__ == "__main__":
35
+ interface.queue() # Enable asynchronous queuing for better performance
36
+ interface.launch()