Spaces:
Running
Running
import gradio as gr | |
from fastai.vision.all import * | |
import librosa | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from pydub import AudioSegment | |
import tempfile | |
import PIL | |
learn = load_learner('model.pkl') | |
labels = learn.dls.vocab | |
def audio_to_spectrogram(audio_file): | |
if isinstance(audio_file, str): | |
if audio_file.endswith('.mp3'): | |
with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav: | |
audio = AudioSegment.from_mp3(audio_file) | |
audio.export(temp_wav.name, format='wav') | |
y, sr = librosa.load(temp_wav.name, sr=None) | |
else: | |
y, sr = librosa.load(audio_file, sr=None) | |
else: | |
y, sr = librosa.load(audio_file, sr=None) | |
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000) | |
S_dB = librosa.power_to_db(S, ref=np.max) | |
fig, ax = plt.subplots() | |
img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax) | |
fig.colorbar(img, ax=ax, format='%+2.0f dB') | |
ax.set(title='Mel-frequency spectrogram') | |
spectrogram_file = "spectrogram.png" | |
plt.savefig(spectrogram_file) | |
plt.close() | |
return spectrogram_file | |
def predict(audio): | |
spectrogram_file = audio_to_spectrogram(audio) | |
img = PILImage.create(spectrogram_file) | |
img = img.resize((512, 512)) | |
pred, pred_idx, probs = learn.predict(img) | |
return {labels[i]: float(probs[i]) for i in range(len(labels))} | |
gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record audio (WAV or MP3)"), | |
], | |
outputs=gr.components.Label(num_top_classes=3), | |
live=True, | |
title="GREEN PEGION CLASSIFIER", | |
description="Upload or record an audio file and then wait some minutes for the output..." | |
).launch() | |