Spaces:
Running
Running
import gradio as gr | |
from fastai.vision.all import * | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import tempfile | |
import sounddevice as sd | |
import soundfile as sf | |
# Load your trained model and define labels | |
learn = load_learner('model.pkl') | |
labels = learn.dls.vocab | |
def record_audio(duration=3, sr=44100, channels=1): | |
print("Recording...") | |
audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32') | |
sd.wait() | |
print("Recording stopped.") | |
return audio, sr | |
def audio_to_spectrogram(audio, sr): | |
S = librosa.feature.melspectrogram(y=audio[:, 0], sr=sr, n_mels=128, fmax=8000) | |
S_dB = librosa.power_to_db(S, ref=np.max) | |
fig, ax = plt.subplots() | |
img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax) | |
fig.colorbar(img, ax=ax, format='%+2.0f dB') | |
ax.set(title='Mel-frequency spectrogram') | |
spectrogram_file = "spectrogram.png" | |
plt.savefig(spectrogram_file) | |
plt.close() | |
return spectrogram_file | |
def predict(audio): | |
audio_data, sr = sf.read(audio) | |
spectrogram_file = audio_to_spectrogram(audio_data, sr) | |
img = PILImage.create(spectrogram_file) | |
img = img.resize((512, 512)) | |
pred, pred_idx, probs = learn.predict(img) | |
return {labels[i]: float(probs[i]) for i in range(len(labels))} | |
# Launch the interface | |
examples = [['example_audio.mp3']] | |
gr.Interface( | |
fn=predict, | |
inputs=gr.Audio(sources="microphone", type="file", label="Record audio (WAV)"), | |
outputs=gr.components.Label(num_top_classes=3), | |
examples=examples, | |
).launch() | |