Spaces:
Runtime error
Runtime error
#import os | |
#import shutil | |
import numpy as np | |
import tensorflow as tf | |
from tensorflow import keras | |
#from pathlib import Path | |
#from IPython.display import display, Audio | |
import numpy as np | |
import tensorflow as tf | |
import gradio as gr | |
from huggingface_hub import from_pretrained_keras | |
#import cv2 | |
#from IPython.display import Audio | |
classes_names = ['Benjamin_Netanyau', 'Jens_Stoltenberg', 'Julia_Gillard', 'Magaret_Tarcher', 'Nelson_Mandela'] | |
# Percentage of samples to use for validation | |
# VALID_SPLIT = 0.1 | |
# Seed to use when shuffling the dataset and the noise | |
# SHUFFLE_SEED = 43 | |
# The sampling rate to use. | |
# This is the one used in all of the audio samples. | |
# We will resample all of the noise to this sampling rate. | |
# This will also be the output size of the audio wave samples | |
# (since all samples are of 1 second long) | |
SAMPLING_RATE = 16000 | |
# The factor to multiply the noise with according to: | |
# noisy_sample = sample + noise * prop * scale | |
# where prop = sample_amplitude / noise_amplitude | |
# SCALE = 0.5 | |
# test_ds = paths_and_labels_to_dataset(valid_audio_paths, valid_labels) | |
# test_ds = test_ds.shuffle(buffer_size=BATCH_SIZE * 8, seed=SHUFFLE_SEED).batch( | |
# BATCH_SIZE | |
# ) | |
# test_ds = test_ds.map(lambda x, y: (add_noise(x, noises, scale=SCALE), y)) | |
model = from_pretrained_keras("keras-io/speaker-recognition") | |
def path_to_audio(path): | |
"""Reads and decodes an audio file.""" | |
audio = tf.io.read_file(path) | |
audio, _ = tf.audio.decode_wav(audio, 1, SAMPLING_RATE) | |
return audio | |
def audio_to_fft(audio): | |
# Since tf.signal.fft applies FFT on the innermost dimension, | |
# we need to squeeze the dimensions and then expand them again | |
# after FFT | |
audio = tf.squeeze(audio, axis=-1) | |
fft = tf.signal.fft( | |
tf.cast(tf.complex(real=audio, imag=tf.zeros_like(audio)), tf.complex64) | |
) | |
fft = tf.expand_dims(fft, axis=-1) | |
# print("audio.shape[1]", audio.shape) | |
# Return the absolute value of the first half of the FFT | |
# which represents the positive frequencies | |
return tf.math.abs(fft[:, : (audio.shape[1] // 2), :]) | |
#actual_audio_path = '/content/drive/MyDrive/Downloads/16000_pcm_speeches/audio/Benjamin_Netanyau/260.wav' | |
# print(path_to_audio(actual_audio_path).shape) | |
# print(actual_audio_path.shape) | |
def predict(actual_audio_path, actual_label): | |
path_of_actual_audio = path_to_audio(actual_audio_path) | |
actual_audio = tf.expand_dims(path_of_actual_audio, axis=0) | |
# Get the signal FFT | |
ffts = audio_to_fft(actual_audio) | |
# Predict | |
y_pred = model.predict(ffts) | |
y_pred = np.argmax(y_pred, axis=-1) | |
# print(y_pred) | |
return classes_names[y_pred[0]], actual_audio_path | |
# the app takes one AUDIO to be recognised | |
input = [gr.inputs.Audio(source="upload", type="filepath", label="Take audio sample"), gr.inputs.Textbox(label="Actual Speaker")] | |
# the app outputs two segmented images | |
output = [gr.outputs.Textbox(label="Predicted Speaker"), gr.outputs.Audio(label="Corresponding Audio")] | |
# it's good practice to pass examples, description and a title to guide users | |
examples = [['audios/260.wav', 'Benjamin_Netanyau'], | |
['audios/611.wav', 'Jens_Stoltenberg'], | |
['audios/65.wav', 'Julia_Gillard'], | |
['audios/1083.wav', 'Magaret_Tarcher'], | |
['audios/605.wav', 'Nelson_Mandela']] | |
title = "Speaker Recognition" | |
description = "Select the noisy audio samples from examples to check whether the speaker recognised by the model is correct or not even in presence of noise !!!" | |
gr.Interface(fn=predict, inputs = input, outputs = output, examples=examples, live=True, allow_flagging=False, analytics_enabled=False, | |
title=title, description=description, article="<center>Space By: <u><a href='https://github.com/robotjellyzone'><b>Kavya Bisht</b></a></u> \n Based on <a href='https://keras.io/examples/audio/speaker_recognition_using_cnn/'><b>this notebook</b></a></center>").launch(enable_queue=True, debug=True) |