Spaces:

abby4
/

Green_pegion

Running

App Files Files Community

abby4 commited on Apr 23

Commit

a088675

•

1 Parent(s): 3a213e0

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -18

app.py CHANGED Viewed

@@ -1,32 +1,24 @@
 import gradio as gr
 from fastai.vision.all import *
-import librosa
 import numpy as np
 import matplotlib.pyplot as plt
-from pydub import AudioSegment
 import tempfile
 learn = load_learner('model.pkl')
 labels = learn.dls.vocab
 def record_audio(duration=3, sr=44100, channels=1):
     print("Recording...")
     audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
     sd.wait()
     print("Recording stopped.")
     return audio, sr
-def audio_to_spectrogram(audio_file,):
-    if audio_file.endswith('.mp3'):
-        with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
-            audio = AudioSegment.from_mp3(audio_file)
-            audio.export(temp_wav.name, format='wav')
-            y, sr = librosa.load(temp_wav.name, sr=None)
-    else:
-        y, sr = librosa.load(audio_file, sr=None)
-    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
     S_dB = librosa.power_to_db(S, ref=np.max)
     fig, ax = plt.subplots()
     img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
@@ -38,17 +30,18 @@ def audio_to_spectrogram(audio_file,):
     return spectrogram_file
 def predict(audio):
-    spectrogram_file = audio_to_spectrogram(audio)
     img = PILImage.create(spectrogram_file)
     img = img.resize((512, 512))
     pred, pred_idx, probs = learn.predict(img)
     return {labels[i]: float(probs[i]) for i in range(len(labels))}
-examples = ['example_audio.mp3']
 gr.Interface(
     fn=predict,
-    inputs=gr.Audio(sources="microphone", type="filepath", label="Upload audio (WAV or MP3)"),
     outputs=gr.components.Label(num_top_classes=3),
     examples=examples,
-).launch()

 import gradio as gr
 from fastai.vision.all import *
 import numpy as np
 import matplotlib.pyplot as plt
 import tempfile
+import sounddevice as sd
+import soundfile as sf
+# Load your trained model and define labels
 learn = load_learner('model.pkl')
 labels = learn.dls.vocab
 def record_audio(duration=3, sr=44100, channels=1):
     print("Recording...")
     audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
     sd.wait()
     print("Recording stopped.")
     return audio, sr
+def audio_to_spectrogram(audio, sr):
+    S = librosa.feature.melspectrogram(y=audio[:, 0], sr=sr, n_mels=128, fmax=8000)
     S_dB = librosa.power_to_db(S, ref=np.max)
     fig, ax = plt.subplots()
     img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
     return spectrogram_file
 def predict(audio):
+    audio_data, sr = sf.read(audio)
+    spectrogram_file = audio_to_spectrogram(audio_data, sr)
     img = PILImage.create(spectrogram_file)
     img = img.resize((512, 512))
     pred, pred_idx, probs = learn.predict(img)
     return {labels[i]: float(probs[i]) for i in range(len(labels))}
+# Launch the interface
+examples = [['example_audio.mp3']]
 gr.Interface(
     fn=predict,
+    inputs=gr.Audio(sources="microphone", type="file", label="Record audio (WAV)"),
     outputs=gr.components.Label(num_top_classes=3),
     examples=examples,
+).launch()