Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import json | |
import collections | |
import scipy.signal | |
import numpy as np | |
from functools import partial | |
import importlib.resources as ir | |
import openwakeword.utils | |
from openwakeword.model import Model | |
# One-time download of all pre-trained models (or only select models) | |
with ir.path('openwakeword', 'resources') as oir: | |
if not oir.is_dir(): | |
openwakeword.utils.download_models() | |
# Load openWakeWord models | |
model = Model( | |
inference_framework="onnx", | |
wakeword_models=["borah_da.onnx", "prin_hawn_da_max_en.onnx", "max_en.onnx"], # can also leave this argument empty to load all of the included pre-trained models | |
) | |
# Define function to process audio | |
def process_audio(audio, state=collections.defaultdict(partial(collections.deque, maxlen=60))): | |
# Resample audio to 16khz if needed | |
if audio[0] != 16000: | |
data = scipy.signal.resample(audio[1], int(float(audio[1].shape[0])/audio[0]*16000)) | |
# Get predictions | |
for i in range(0, data.shape[0], 1280): | |
if len(data.shape) == 2 or data.shape[-1] == 2: | |
chunk = data[i:i+1280][:, 0] # just get one channel of audio | |
else: | |
chunk = data[i:i+1280] | |
if chunk.shape[0] == 1280: | |
prediction = model.predict(chunk) | |
for key in prediction: | |
#Fill deque with zeros if it's empty | |
if len(state[key]) == 0: | |
state[key].extend(np.zeros(60)) | |
# Add prediction | |
state[key].append(prediction[key]) | |
# Make line plot | |
dfs = [] | |
for key in state.keys(): | |
df = pd.DataFrame({"x": np.arange(len(state[key])), "y": state[key], "Model": key}) | |
dfs.append(df) | |
df = pd.concat(dfs) | |
plot = gr.LinePlot().update(value = df, x='x', y='y', color="Model", y_lim = (0,1), tooltip="Model", | |
width=600, height=300, x_title="Time (frames)", y_title="Model Score", color_legend_position="bottom") | |
# Manually adjust how the legend is displayed | |
tmp = json.loads(plot["value"]["plot"]) | |
tmp["layer"][0]['encoding']['color']['legend']["direction"] = "vertical" | |
tmp["layer"][0]['encoding']['color']['legend']["columns"] = 4 | |
tmp["layer"][0]['encoding']['color']['legend']["labelFontSize"] = 12 | |
tmp["layer"][0]['encoding']['color']['legend']["titleFontSize"] = 14 | |
plot["value"]['plot'] = json.dumps(tmp) | |
return plot, state | |
# Create Gradio interface and launch | |
desc = """ | |
This is a demo of the pre-trained models included in the latest release | |
of the [openWakeWord](https://github.com/dscripka/openWakeWord) library. | |
Click on the "record from microphone" button below to start capturing. | |
The real-time scores from each model will be shown in the line plot. Hover over | |
each line to see the name of the corresponding model. | |
Different models will respond to different wake words/phrases (see [the model docs](https://github.com/dscripka/openWakeWord/tree/main/docs/models) for more details). | |
If everything is working properly, | |
you should see a spike in the score for a given model after speaking a related word/phrase. Below are some suggested phrases to try! | |
| Model Name | Word/Phrase | | |
| --- | --- | | |
| borah_da | "bora da" | | |
| max_en | "Macsen" | | |
| prin_hawn_da_max_en | "Prynhawn da, Macsen"| | |
""" | |
gr_int = gr.Interface( | |
title = "openWakeWord Live Demo", | |
description = desc, | |
css = ".flex {flex-direction: column} .gr-panel {width: 100%}", | |
fn=process_audio, | |
inputs=[ | |
gr.Audio(source="microphone", type="numpy", streaming=True, show_label=False), | |
"state" | |
], | |
outputs=[ | |
gr.LinePlot(show_label=False), | |
"state" | |
], | |
live=True) | |
gr_int.launch() |