Spaces:

camparchimedes
/

nb

Build error

App Files Files

nb / app.py

camparchimedes

Update app.py

592f7e1 verified 4 months ago

raw

history blame

2.26 kB

	import gradio as gr
	import warnings
	import torch
	from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
	import soundfile as sf

	warnings.filterwarnings("ignore")

	# Load tokenizer and model
	tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
	model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
	processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")

	# Set up the device
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	torch_dtype = torch.float32

	# Initialize pipeline
	#asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype)

	#def transcribe_audio(audio_file):
	#with torch.no_grad():
	#output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
	#return output["text"]

	def transcribe_audio(audio_file):
	audio_input, _ = sf.read(audio_file)
	inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
	inputs = inputs.to(device)
	with torch.no_grad():
	output = model.generate(
	inputs.input_features,
	max_length=448,
	chunk_length_s=28,
	num_beams=5,
	task="transcribe",
	language="no"
	)
	transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
	return transcription
	#print(transcription)

	# HTML for banner image
	banner_html = """
	<div style="text-align: center;">
	<img src="https://huggingface.co/spaces/camparchimedes/work_harder/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
	</div>
	"""

	# Create Gradio interface
	iface = gr.Blocks()

	with iface:
	gr.HTML(banner_html)
	gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(type="filepath"),
	outputs="text",
	title="Audio Transcription App",
	description="Upload an audio file to get the transcription",
	theme="default",
	layout="vertical",
	live=False
	)

	# Launch the interface
	iface.launch(share=True, debug=True)