Green_pegion / app.py
abby4's picture
Update app.py
61c2aa1 verified
raw
history blame
1.58 kB
import gradio as gr
from fastai.vision.all import *
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import sounddevice as sd
import soundfile as sf
# Load your trained model and define labels
learn = load_learner('model.pkl')
labels = learn.dls.vocab
def record_audio(duration=3, sr=44100, channels=1):
print("Recording...")
audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
sd.wait()
print("Recording stopped.")
return audio, sr
def audio_to_spectrogram(audio, sr):
S = librosa.feature.melspectrogram(y=audio[:, 0], sr=sr, n_mels=128, fmax=8000)
S_dB = librosa.power_to_db(S, ref=np.max)
fig, ax = plt.subplots()
img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
fig.colorbar(img, ax=ax, format='%+2.0f dB')
ax.set(title='Mel-frequency spectrogram')
spectrogram_file = "spectrogram.png"
plt.savefig(spectrogram_file)
plt.close()
return spectrogram_file
def predict(audio):
audio_data, sr = sf.read(audio)
spectrogram_file = audio_to_spectrogram(audio_data, sr)
img = PILImage.create(spectrogram_file)
img = img.resize((512, 512))
pred, pred_idx, probs = learn.predict(img)
return {labels[i]: float(probs[i]) for i in range(len(labels))}
# Launch the interface
examples = [['example_audio.mp3']]
gr.Interface(
fn=predict,
inputs=gr.Audio(sources="microphone", type="file", label="Record audio (WAV)"),
outputs=gr.components.Label(num_top_classes=3),
examples=examples,
).launch()