Spaces:
Running
Running
File size: 1,288 Bytes
fe8fc6e 9346450 fe8fc6e 9346450 fe8fc6e 9346450 fe8fc6e 9346450 fe8fc6e 9346450 fe8fc6e 012dff0 fe8fc6e 012dff0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import soundfile as sf
# Load Whisper model and processor from Hugging Face
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
def transcribe(audio_path):
try:
# Read audio file
audio, sampling_rate = sf.read(audio_path)
# Process audio
inputs = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features
# Move to appropriate device
inputs = inputs.to(model.device)
# Generate transcription
predicted_ids = model.generate(inputs)
transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
return transcription
except Exception as e:
return f"Error: {str(e)}"
# Create a Gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Whisper Transcription",
description="Upload an audio file and get the transcription using Whisper model."
)
if __name__ == "__main__":
iface.launch()
|