Enutrof commited on
Commit
8f5986e
1 Parent(s): 796753a
Files changed (2) hide show
  1. app.py +2 -2
  2. inference.py +11 -38
app.py CHANGED
@@ -2,6 +2,6 @@ import gradio as gr
2
  from inference import *
3
 
4
  iface = gr.Interface(fn=inference,
5
- inputs='audio', #gr.inputs.Audio(source="upload", type="filepath"),
6
  outputs="text")
7
- iface.launch(share=True)
 
2
  from inference import *
3
 
4
  iface = gr.Interface(fn=inference,
5
+ inputs=gr.inputs.Audio(source="upload", type="filepath"),
6
  outputs="text")
7
+ iface.launch()
inference.py CHANGED
@@ -1,44 +1,17 @@
1
- import math, librosa
2
  import numpy as np
 
3
 
4
  from tensorflow import keras
5
 
6
- SAMPLE_RATE = 22050
7
- def extract_mfcc_batch(file_path, n_mfcc=13, n_fft=1024, hop_length=512, length_segment=10):
8
- """
9
- Extract and return an mfcc batch
10
- MFCC - Mel Frequency Cepstrum Coefficients
11
- """
12
- mfcc_batch = []
13
- num_samples_per_segment = 220500 #length_segment * SAMPLE_RATE
14
-
15
- sr, signal = file_path#librosa.load(file_path, sr=SAMPLE_RATE)
16
- signal = signal.astype(np.float64)
17
-
18
- duration = librosa.get_duration(y=signal, sr=sr) #30 seconds
19
- print(duration)
20
- num_segments = int(duration/length_segment) #3
21
- # process segments, extracting mfccs and storing data
22
- for s in range(num_segments+1):
23
- start_sample = num_samples_per_segment * s
24
- finish_sample = start_sample + num_samples_per_segment
25
- try:
26
- mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample],
27
- sr=SAMPLE_RATE,
28
- n_fft=n_fft,
29
- n_mfcc=n_mfcc,
30
- hop_length=hop_length
31
- )
32
- #(13, 431)
33
- mfcc = mfcc.T # A transpose
34
- # store mfcc for segment if it has the expected length
35
- if len(mfcc) == 431:
36
- mfcc_batch.append(mfcc.tolist())
37
-
38
- except Exception as e:
39
- print(e)
40
- continue
41
- return mfcc_batch
42
 
43
  def inference(filename, model_path='gtzan10_lstm_0.7179_l_1.12.h5'):
44
  model = keras.models.load_model(model_path)
@@ -52,7 +25,7 @@ def inference(filename, model_path='gtzan10_lstm_0.7179_l_1.12.h5'):
52
  'pop',
53
  'reggae',
54
  'rock']
55
- mfcc = extract_mfcc_batch(filename)
56
  pred = model.predict(mfcc)
57
  genre = [mapping[i] for i in np.argmax(pred, axis=1)]
58
 
 
 
1
  import numpy as np
2
+ import requests
3
 
4
  from tensorflow import keras
5
 
6
+ def get_mfccs(filename):
7
+ # Load the file to send
8
+ files = {'audio': open(filename, 'rb')}
9
+ # Send the HTTP request and get the reply
10
+ reply = requests.post("https://librosa-utils.herokuapp.com/mfcc_batch", files=files)
11
+ # Extract the text from the reply and decode the JSON into a list
12
+ pitch_track = reply.json()
13
+ print(np.shape(pitch_track['mfccs']))
14
+ return np.array(pitch_track['mfccs'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def inference(filename, model_path='gtzan10_lstm_0.7179_l_1.12.h5'):
17
  model = keras.models.load_model(model_path)
 
25
  'pop',
26
  'reggae',
27
  'rock']
28
+ mfcc = get_mfccs(filename)
29
  pred = model.predict(mfcc)
30
  genre = [mapping[i] for i in np.argmax(pred, axis=1)]
31