Spaces:

abby4
/

Green_pegion

Running

Green_pegion / app.py

Update app.py

61c2aa1 verified 7 months ago

1.58 kB

	import gradio as gr
	from fastai.vision.all import *
	import numpy as np
	import matplotlib.pyplot as plt
	import tempfile
	import sounddevice as sd
	import soundfile as sf

	# Load your trained model and define labels
	learn = load_learner('model.pkl')
	labels = learn.dls.vocab

	def record_audio(duration=3, sr=44100, channels=1):
	print("Recording...")
	audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
	sd.wait()
	print("Recording stopped.")
	return audio, sr

	def audio_to_spectrogram(audio, sr):
	S = librosa.feature.melspectrogram(y=audio[:, 0], sr=sr, n_mels=128, fmax=8000)
	S_dB = librosa.power_to_db(S, ref=np.max)
	fig, ax = plt.subplots()
	img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
	fig.colorbar(img, ax=ax, format='%+2.0f dB')
	ax.set(title='Mel-frequency spectrogram')
	spectrogram_file = "spectrogram.png"
	plt.savefig(spectrogram_file)
	plt.close()
	return spectrogram_file

	def predict(audio):
	audio_data, sr = sf.read(audio)
	spectrogram_file = audio_to_spectrogram(audio_data, sr)
	img = PILImage.create(spectrogram_file)
	img = img.resize((512, 512))
	pred, pred_idx, probs = learn.predict(img)
	return {labels[i]: float(probs[i]) for i in range(len(labels))}

	# Launch the interface
	examples = [['example_audio.mp3']]
	gr.Interface(
	fn=predict,
	inputs=gr.Audio(sources="microphone", type="file", label="Record audio (WAV)"),
	outputs=gr.components.Label(num_top_classes=3),
	examples=examples,
	).launch()