Spaces:
Runtime error
Runtime error
from funasr import AutoModel | |
from funasr.utils.postprocess_utils import rich_transcription_postprocess | |
from modelscope import snapshot_download | |
import datetime | |
import math | |
import io | |
import os | |
import tempfile | |
import json | |
from typing import Optional | |
import torch | |
import gradio as gr | |
from config import model_config | |
try: | |
import spaces | |
USING_SPACES = True | |
except ImportError: | |
USING_SPACES = False | |
def gpu_decorator(func): | |
if USING_SPACES: | |
return spaces.GPU(func) | |
else: | |
return func | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
model_dir = snapshot_download(model_config['model_dir']) | |
model = AutoModel( | |
model=model_dir, | |
vad_kwargs={"max_single_segment_time": 15000}, | |
ncpu=torch.get_num_threads(), | |
batch_size=1, | |
hub="hf", | |
device=device, | |
) | |
def transcribe_audio(file_path, vad_model="fsmn-vad", vad_kwargs='{"max_single_segment_time": 15000}', | |
batch_size=1, language="auto", use_itn=True, batch_size_s=60, | |
merge_vad=True, merge_length_s=15, batch_size_threshold_s=50, | |
hotword=" ", ban_emo_unk=True): | |
try: | |
vad_kwargs = json.loads(vad_kwargs) | |
temp_file_path = file_path | |
res = model.generate( | |
input=temp_file_path, | |
cache={}, | |
language=language, | |
use_itn=use_itn, | |
batch_size_s=batch_size_s, | |
merge_vad=merge_vad, | |
merge_length_s=merge_length_s, | |
batch_size_threshold_s=batch_size_threshold_s, | |
hotword=hotword, | |
ban_emo_unk=ban_emo_unk | |
) | |
return res[0] | |
except Exception as e: | |
return str(e) | |
inputs = [ | |
gr.Audio(type="filepath"), | |
gr.Textbox(value="fsmn-vad", label="VAD Model"), | |
gr.Textbox(value='{"max_single_segment_time": 15000}', label="VAD Kwargs"), | |
gr.Slider(1, 10, value=1, step=1, label="Batch Size"), | |
gr.Textbox(value="auto", label="Language"), | |
gr.Checkbox(value=True, label="Use ITN"), | |
gr.Slider(30, 120, value=60, step=1, label="Batch Size (seconds)"), | |
gr.Checkbox(value=True, label="Merge VAD"), | |
gr.Slider(5, 60, value=15, step=1, label="Merge Length (seconds)"), | |
gr.Slider(10, 100, value=50, step=1, label="Batch Size Threshold (seconds)"), | |
gr.Textbox(value=" ", label="Hotword"), | |
gr.Checkbox(value=True, label="Ban Emotional Unknown"), | |
] | |
outputs = gr.Textbox(label="Transcription") | |
gr.Interface( | |
fn=transcribe_audio, | |
inputs=inputs, | |
outputs=outputs, | |
title="ASR Transcription with FunASR" | |
).launch() |