import gradio as gr import json import pandas as pd import collections import scipy.signal import numpy as np from functools import partial from openwakeword.model import Model # Load openWakeWord models model = Model() # Define function to process audio def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=60))): # Resample audio to 16khz if needed if audio[0] != 16000: data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000)) # Get predictions for i in range(0, len(data), 1280): chunk = data[i:i+1280] if len(chunk) == 1280: prediction = model.predict(chunk) for key in prediction: #Fill deque with zeros if it's empty if len(state[key]) == 0: state[key].extend(np.zeros(60)) # Add prediction state[key].append(prediction[key]) # Make line plot dfs = [] for key in state.keys(): df = pd.DataFrame({"x": np.arange(len(state[key])), "y": state[key], "Model": key}) dfs.append(df) df = pd.concat(dfs) plot = gr.LinePlot().update(value = df, x='x', y='y', color="Model", y_lim = (0,1), tooltip="Model", width=600, height=300, x_title="Time (frames)", y_title="Model Score", color_legend_position="bottom") # Manually adjust how the legend is displayed tmp = json.loads(plot["value"]["plot"]) tmp["layer"][0]['encoding']['color']['legend']["direction"] = "vertical" tmp["layer"][0]['encoding']['color']['legend']["columns"] = 4 tmp["layer"][0]['encoding']['color']['legend']["labelFontSize"] = 12 tmp["layer"][0]['encoding']['color']['legend']["titleFontSize"] = 14 plot["value"]['plot'] = json.dumps(tmp) return plot, state # Create Gradio interface and launch gr_int = gr.Interface( css = ".flex {flex-direction: column} .gr-panel {width: 100%}", fn=process_audio, inputs=[ gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False), "state" ], outputs=[ gr.LinePlot(show_label=False), "state" ], live=True) gr_int.launch()