Spaces:
Build error
Build error
import gradio as gr | |
import warnings | |
import torch | |
from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor | |
import soundfile as sf | |
warnings.filterwarnings("ignore") | |
# Load tokenizer and model | |
tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium") | |
model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium") | |
processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium") | |
# Set up the device | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
torch_dtype = torch.float32 | |
# Initialize pipeline | |
#asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype) | |
#def transcribe_audio(audio_file): | |
#with torch.no_grad(): | |
#output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"}) | |
#return output["text"] | |
def transcribe_audio(audio_file): | |
audio_input, _ = sf.read(audio_file) | |
inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt") | |
inputs = inputs.to(device) | |
with torch.no_grad(): | |
output = model.generate( | |
inputs.input_features, | |
max_length=448, | |
chunk_length_s=28, | |
num_beams=5, | |
task="transcribe", | |
language="no" | |
) | |
transcription = processor.batch_decode(output, skip_special_tokens=True)[0] | |
return transcription | |
#print(transcription) | |
# HTML for banner image | |
banner_html = """ | |
<div style="text-align: center;"> | |
<img src="https://huggingface.co/spaces/camparchimedes/work_harder/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;"> | |
</div> | |
""" | |
# Create Gradio interface | |
iface = gr.Blocks() | |
with iface: | |
gr.HTML(banner_html) | |
gr.Interface( | |
fn=transcribe_audio, | |
inputs=gr.Audio(type="filepath"), | |
outputs="text", | |
title="Audio Transcription App", | |
description="Upload an audio file to get the transcription", | |
theme="default", | |
layout="vertical", | |
live=False | |
) | |
# Launch the interface | |
iface.launch(share=True, debug=True) |