Spaces:

TaiYouWeb
/

funasr-svsmall

Runtime error

App Files Files Community

TaiYouWeb commited on Oct 6

Commit

6395f19

•

1 Parent(s): b1d5ba9

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -23

app.py CHANGED Viewed

@@ -2,12 +2,22 @@ from funasr import AutoModel
 from funasr.utils.postprocess_utils import rich_transcription_postprocess
 from modelscope import snapshot_download
 import json
 import torch
 import gradio as gr
 from config import model_config
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 model_dir = snapshot_download(model_config['model_dir'])
@@ -16,8 +26,7 @@ model = AutoModel(
     trust_remote_code=False,
     remote_code="./model.py",
     vad_model="fsmn-vad",
-    punc_model="ct-punc",
-    spk_model="cam++",
     vad_kwargs={"max_single_segment_time": 15000},
     ncpu=torch.get_num_threads(),
     batch_size=1,
@@ -25,12 +34,13 @@ model = AutoModel(
     device=device,
 )
-def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_model="cam++", vad_kwargs='{"max_single_segment_time": 15000}',
-                     batch_size=1, language="auto", use_itn=True, batch_size_s=60,
-                     merge_vad=True, merge_length_s=15, batch_size_threshold_s=50,
                      hotword=" ", ban_emo_unk=True):
     try:
         vad_kwargs = json.loads(vad_kwargs)
         temp_file_path = file_path
         res = model.generate(
@@ -46,18 +56,9 @@ def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_
             ban_emo_unk=ban_emo_unk
         )
-        segments = res[0]["segments"]
-        transcription = ""
-        for segment in segments:
-            start_time = segment["start"]
-            end_time = segment["end"]
-            speaker = segment.get("speaker", "unknown")
-            text = segment["text"]
-            transcription += f"[{start_time:.2f}s - {end_time:.2f}s] Speaker {speaker}: {text}\n"
-        return transcription
     except Exception as e:
         return str(e)
@@ -66,7 +67,6 @@ inputs = [
     gr.Audio(type="filepath"),
     gr.Textbox(value="fsmn-vad", label="VAD Model"),
     gr.Textbox(value="ct-punc", label="PUNC Model"),
-    gr.Textbox(value="cam++", label="SPK Model"),
     gr.Textbox(value='{"max_single_segment_time": 15000}', label="VAD Kwargs"),
     gr.Slider(1, 10, value=1, step=1, label="Batch Size"),
     gr.Textbox(value="auto", label="Language"),
@@ -82,8 +82,8 @@ inputs = [
 outputs = gr.Textbox(label="Transcription")
 gr.Interface(
-    fn=transcribe_audio,
-    inputs=inputs,
-    outputs=outputs,
-    title="ASR Transcription with Speaker Diarization and Timestamps"
-).launch()

 from funasr.utils.postprocess_utils import rich_transcription_postprocess
 from modelscope import snapshot_download
+import datetime
+import math
+import io
+import os
+import tempfile
 import json
+from typing import Optional
 import torch
 import gradio as gr
 from config import model_config
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 model_dir = snapshot_download(model_config['model_dir'])
     trust_remote_code=False,
     remote_code="./model.py",
     vad_model="fsmn-vad",
+    punc_model="ct-punc",
     vad_kwargs={"max_single_segment_time": 15000},
     ncpu=torch.get_num_threads(),
     batch_size=1,
     device=device,
 )
+def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", vad_kwargs='{"max_single_segment_time": 15000}',
+                     batch_size=1, language="auto", use_itn=True, batch_size_s=60,
+                     merge_vad=True, merge_length_s=15, batch_size_threshold_s=50,
                      hotword=" ", ban_emo_unk=True):
     try:
         vad_kwargs = json.loads(vad_kwargs)
         temp_file_path = file_path
         res = model.generate(
             ban_emo_unk=ban_emo_unk
         )
+        text = rich_transcription_postprocess(res[0]["text"])
+        return text
     except Exception as e:
         return str(e)
     gr.Audio(type="filepath"),
     gr.Textbox(value="fsmn-vad", label="VAD Model"),
     gr.Textbox(value="ct-punc", label="PUNC Model"),
     gr.Textbox(value='{"max_single_segment_time": 15000}', label="VAD Kwargs"),
     gr.Slider(1, 10, value=1, step=1, label="Batch Size"),
     gr.Textbox(value="auto", label="Language"),
 outputs = gr.Textbox(label="Transcription")
 gr.Interface(
+    fn=transcribe_audio,
+    inputs=inputs,
+    outputs=outputs,
+    title="ASR Transcription with FunASR"
+).launch()