Spaces:
Runtime error
Runtime error
from funasr import AutoModel | |
from funasr.utils.postprocess_utils import rich_transcription_postprocess | |
from modelscope import snapshot_download | |
import io | |
import os | |
import tempfile | |
import json | |
from typing import Optional | |
import torch | |
import gradio as gr | |
from config import model_config | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
model_dir = snapshot_download(model_config['model_dir']) | |
model = AutoModel( | |
model=model_dir, | |
trust_remote_code=False, | |
remote_code="./model.py", | |
vad_model="fsmn-vad", | |
punc_model="ct-punc", | |
vad_kwargs={"max_single_segment_time": 30000}, | |
ncpu=torch.get_num_threads(), | |
batch_size=1, | |
hub="hf", | |
device=device, | |
) | |
def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", vad_kwargs='{"max_single_segment_time": 30000}', | |
batch_size=1, language="auto", use_itn=True, batch_size_s=60, | |
merge_vad=True, merge_length_s=15, batch_size_threshold_s=50, | |
hotword=" ", spk_model="cam++", ban_emo_unk=False): | |
try: | |
vad_kwargs = json.loads(vad_kwargs) | |
temp_file_path = file_path | |
res = model.generate( | |
input=temp_file_path, | |
cache={}, | |
language=language, | |
use_itn=use_itn, | |
batch_size_s=batch_size_s, | |
merge_vad=merge_vad, | |
merge_length_s=merge_length_s, | |
batch_size_threshold_s=batch_size_threshold_s, | |
hotword=hotword, | |
spk_model=spk_model, | |
ban_emo_unk=ban_emo_unk | |
) | |
text = rich_transcription_postprocess(res[0]["text"]) | |
return text | |
except Exception as e: | |
return str(e) | |
inputs = [ | |
gr.Audio(type="filepath"), | |
gr.Textbox(value="fsmn-vad", label="VAD Model"), | |
gr.Textbox(value="ct-punc", label="PUNC Model"), | |
gr.Textbox(value='{"max_single_segment_time": 30000}', label="VAD Kwargs"), | |
gr.Slider(1, 10, value=1, step=1, label="Batch Size"), | |
gr.Textbox(value="auto", label="Language"), | |
gr.Checkbox(value=True, label="Use ITN"), | |
gr.Slider(30, 120, value=60, step=1, label="Batch Size (seconds)"), | |
gr.Checkbox(value=True, label="Merge VAD"), | |
gr.Slider(5, 60, value=15, step=1, label="Merge Length (seconds)"), | |
gr.Slider(10, 100, value=50, step=1, label="Batch Size Threshold (seconds)"), | |
gr.Textbox(value=" ", label="Hotword"), | |
gr.Textbox(value="cam++", label="Speaker Model"), | |
gr.Checkbox(value=False, label="Ban Emotional Unknown"), | |
] | |
outputs = gr.Textbox(label="Transcription") | |
gr.Interface( | |
fn=transcribe_audio, | |
inputs=inputs, | |
outputs=outputs, | |
title="ASR Transcription with FunASR" | |
).launch() |