import gradio as gr import torch from transformers import WhisperProcessor, WhisperForConditionalGeneration # Load Whisper model and processor from Hugging Face processor = WhisperProcessor.from_pretrained("openai/whisper-base") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu") def transcribe(audio): try: # Load audio audio_input = processor(audio, sampling_rate=16000, return_tensors="pt") # Move to appropriate device audio_input = audio_input.input_features.to(model.device) # Generate transcription predicted_ids = model.generate(audio_input) transcription = processor.decode(predicted_ids[0], skip_special_tokens=True) return transcription except Exception as e: return f"Error: {str(e)}" # Create a Gradio interface iface = gr.Interface( fn=transcribe, inputs=gr.inputs.Audio(source="microphone", type="filepath"), outputs="text", title="Whisper Transcription", description="Upload an audio file and get the transcription using Whisper model." ) if __name__ == "__main__": iface.launch()