Spaces:
Running
Running
File size: 1,584 Bytes
bf9d5cd a088675 bf9d5cd a088675 bf9d5cd 3a213e0 bf9d5cd a088675 bf9d5cd a088675 bf9d5cd a088675 bf9d5cd a088675 bf9d5cd a4a5329 a088675 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import gradio as gr
from fastai.vision.all import *
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import sounddevice as sd
import soundfile as sf
# Load your trained model and define labels
learn = load_learner('model.pkl')
labels = learn.dls.vocab
def record_audio(duration=3, sr=44100, channels=1):
print("Recording...")
audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
sd.wait()
print("Recording stopped.")
return audio, sr
def audio_to_spectrogram(audio, sr):
S = librosa.feature.melspectrogram(y=audio[:, 0], sr=sr, n_mels=128, fmax=8000)
S_dB = librosa.power_to_db(S, ref=np.max)
fig, ax = plt.subplots()
img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
fig.colorbar(img, ax=ax, format='%+2.0f dB')
ax.set(title='Mel-frequency spectrogram')
spectrogram_file = "spectrogram.png"
plt.savefig(spectrogram_file)
plt.close()
return spectrogram_file
def predict(audio):
audio_data, sr = sf.read(audio)
spectrogram_file = audio_to_spectrogram(audio_data, sr)
img = PILImage.create(spectrogram_file)
img = img.resize((512, 512))
pred, pred_idx, probs = learn.predict(img)
return {labels[i]: float(probs[i]) for i in range(len(labels))}
# Launch the interface
examples = [['example_audio.mp3']]
gr.Interface(
fn=predict,
inputs=gr.Audio(sources="microphone", type="file", label="Record audio (WAV)"),
outputs=gr.components.Label(num_top_classes=3),
examples=examples,
).launch()
|