nb / app.py
camparchimedes's picture
Update app.py
592f7e1 verified
raw
history blame
2.26 kB
import gradio as gr
import warnings
import torch
from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
import soundfile as sf
warnings.filterwarnings("ignore")
# Load tokenizer and model
tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
# Set up the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch_dtype = torch.float32
# Initialize pipeline
#asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype)
#def transcribe_audio(audio_file):
#with torch.no_grad():
#output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
#return output["text"]
def transcribe_audio(audio_file):
audio_input, _ = sf.read(audio_file)
inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
inputs = inputs.to(device)
with torch.no_grad():
output = model.generate(
inputs.input_features,
max_length=448,
chunk_length_s=28,
num_beams=5,
task="transcribe",
language="no"
)
transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
return transcription
#print(transcription)
# HTML for banner image
banner_html = """
<div style="text-align: center;">
<img src="https://huggingface.co/spaces/camparchimedes/work_harder/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
</div>
"""
# Create Gradio interface
iface = gr.Blocks()
with iface:
gr.HTML(banner_html)
gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath"),
outputs="text",
title="Audio Transcription App",
description="Upload an audio file to get the transcription",
theme="default",
layout="vertical",
live=False
)
# Launch the interface
iface.launch(share=True, debug=True)