import tensorflow as tf import numpy as np import librosa import pickle import io # Load the YAMNet model from the SavedModel format yamnet_model = tf.saved_model.load('yamnet_saved_model') # Function to extract embeddings from audio file using YAMNet def extract_audio_embeddings(audio_binary): # Load audio from binary data using librosa audio, sample_rate = librosa.load(io.BytesIO(audio_binary), sr=16000) # YAMNet requires a sample rate of 16kHz # Convert audio to float32 tensor audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32) # Extract embeddings using YAMNet model scores, embeddings, spectrogram = yamnet_model(audio_tensor) embeddings_list = embeddings.numpy().tolist() # Convert embeddings to a list of lists return embeddings_list # Example usage if __name__ == "__main__": image_audio_path = "pictures/users/1a.mp3" # Extract embeddings from image audio file image_audio_embeddings = extract_audio_embeddings(image_audio_path) print("Embeddings for", image_audio_path) print(image_audio_embeddings) print("audio embedding model loaded succesfully")