import gradio as gr import pandas as pd import json import collections import scipy.signal import numpy as np from functools import partial import importlib.resources as ir from openwakeword.model import Model # One-time download of all pre-trained models (or only select models) with ir.path('openwakeword', 'resources') as oir: if not oir.is_dir(): openwakeword.utils.download_models() # Load openWakeWord models model = Model( inference_framework="onnx", #wakeword_models=["borah_da.onnx"], # can also leave this argument empty to load all of the included pre-trained models ) # Define function to process audio def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=60))): # Resample audio to 16khz if needed if audio[0] != 16000: data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000)) # Get predictions for i in range(0, data.shape[0], 1280): if len(data.shape) == 2 or data.shape[-1] == 2: chunk = data[i:i+1280][:, 0] # just get one channel of audio else: chunk = data[i:i+1280] if chunk.shape[0] == 1280: prediction = model.predict(chunk) for key in prediction: #Fill deque with zeros if it's empty if len(state[key]) == 0: state[key].extend(np.zeros(60)) # Add prediction state[key].append(prediction[key]) # Make line plot dfs = [] for key in state.keys(): df = pd.DataFrame({"x": np.arange(len(state[key])), "y": state[key], "Model": key}) dfs.append(df) df = pd.concat(dfs) plot = gr.LinePlot().update(value = df, x='x', y='y', color="Model", y_lim = (0,1), tooltip="Model", width=600, height=300, x_title="Time (frames)", y_title="Model Score", color_legend_position="bottom") # Manually adjust how the legend is displayed tmp = json.loads(plot["value"]["plot"]) tmp["layer"][0]['encoding']['color']['legend']["direction"] = "vertical" tmp["layer"][0]['encoding']['color']['legend']["columns"] = 4 tmp["layer"][0]['encoding']['color']['legend']["labelFontSize"] = 12 tmp["layer"][0]['encoding']['color']['legend']["titleFontSize"] = 14 plot["value"]['plot'] = json.dumps(tmp) return plot, state # Create Gradio interface and launch desc = """ This is a demo of the pre-trained models included in the latest release of the [openWakeWord](https://github.com/dscripka/openWakeWord) library. Click on the "record from microphone" button below to start capturing. The real-time scores from each model will be shown in the line plot. Hover over each line to see the name of the corresponding model. Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details). If everything is working properly, you should see a spike in the score for a given model after speaking a related word/phrase. Below are some suggested phrases to try! | Model Name | Word/Phrase | | --- | --- | | bora da | "bora da" | """ gr_int = gr.Interface( title = "openWakeWord Live Demo", description = desc, css = ".flex {flex-direction: column} .gr-panel {width: 100%}", fn=process_audio, inputs=[ gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False), "state" ], outputs=[ gr.LinePlot(show_label=False), "state" ], live=True) gr_int.launch()