import numpy as np import tensorflow as tf from scipy.io.wavfile import write import keras.backend as K import librosa.display import cv2 import librosa import matplotlib.pyplot as plt import librosa.display import numpy as np from keras.applications import VGG16 import os import scipy # Define function to preprocess input audio #convert song to mel spectogram as siamese network doesn't work on sound directly def create_spectrogram(clip,sample_rate,save_path): plt.interactive(False) fig=plt.figure(figsize=[0.72,0.72]) S=librosa.feature.melspectrogram(y=clip,sr=sample_rate) librosa.display.specshow(librosa.power_to_db(S,ref=np.max)) fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0) plt.close() fig.clf() plt.close(fig) plt.close('all') del save_path,clip,sample_rate,fig,S def load_img(path): img=cv2.imread(path) img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB) img=cv2.resize(img,(150,150)) return img import pickle def main_loop(): with open('dict.pickle', 'rb') as handle: songspecdict = pickle.load(handle) # Load the song to match song, sr = librosa.load("my_audio.wav") to_match = np.copy(song[0:220500]) print("Loaded data into librosa...") # Create spectrogram image of the song to match create_spectrogram(to_match, sr, 'test.png') print("Created spectogram...") # Load the spectrogram image of the song to match to_match_img = load_img('test.png') to_match_img = np.expand_dims(to_match_img, axis=0) print("Loaded spectrum image...") # Get the embedding of the song to match # Load the tune recognition model model = tf.keras.models.load_model('./embdmodel_1.hdf5') embedding_model=model.layers[2] to_match_emb = embedding_model.predict(to_match_img) print("Get song embedding...") # Calculate the distances between the song to match and the songs in the database songsdistdict = {} for key, values in songspecdict.items(): dist_array = [] for embd in values: dist_array.append(np.linalg.norm(to_match_emb - embd)) songsdistdict[key] = min(dist_array) song_titles=list(songsdistdict.keys()) distances=list(songsdistdict.values()) # Get the title and artist of the recognized song recognized_song_artist, recognized_song_title = song_titles[distances.index(min(distances))].split('-') recognized_song_title = os.path.splitext(recognized_song_title)[0] print(f'Artist: {recognized_song_artist}') print(f'Title: {recognized_song_title}') return recognized_song_title