Spaces:

tykiww
/

diarize_and_transcribe

Sleeping

File size: 4,637 Bytes

0cc20f3
7e338ce
36f1349
0cc20f3
 
adf1205
f390e12
5650727
b91b901
afa0cd6
 
adf1205
f390e12
0cc20f3
32c34a8
 
 
 
 
9ae2604
1c763ac
0cc20f3
afa0cd6
 
0cc20f3
 
afa0cd6
 
70762cf
0cc20f3
70762cf
dee336f
adf1205
0cc20f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f92dc6
 
 
 
 
 
0cc20f3
7e338ce
adafc31
8f92dc6
0cc20f3
adafc31
0cc20f3
ea58d27
8f92dc6
 
0cc20f3
8f92dc6
 
7e338ce
 
adf1205
6b5ce4a
adf1205
 
 
 
 
 
004be7e
1829484
adf1205
 
 
 
77b5e94
7f2009a
004be7e
8f92dc6
6b5ce4a
004be7e
6b5ce4a
adf1205
adafc31
004be7e
 
77b5e94
adafc31
0cc20f3
 
 
8f92dc6
0cc20f3
 
 
 
 
 
 
 
adafc31
004be7e
 
0cc20f3
 
adf1205
004be7e
 
0cc20f3
 
 
 
 
ea58d27
adafc31
0cc20f3
adf1205
 
6b5ce4a
adf1205
 
 
 
 
5650727
adf1205
0cc20f3
5650727
 
0cc20f3
b91b901
0cc20f3
adf1205

import os
import tempfile
import gradio as gr 
import pandas as pd

from utilities.setup import get_files
#import spaces
from services.diarization import Diarizer
from services.asr import Transcriber
from speechbox import ASRDiarizationPipeline


#@spaces.GPU
def process_meeting(audio_input, num_speakers):
    """
    audio_input: filepath --> str
    num_speakers: number --> int
    speaker_names: dataset --> np.array
    """
    
    print(audio_input) 
    # Get diarization and transcription pipelines
    diarization_pipeline = diarizer.get_pipeline()
    asr_pipeline = transcriber.get_pipeline()

    # Pass it into speechbox for prediction and cleaning
    pipeline = ASRDiarizationPipeline(
        asr_pipeline=asr_pipeline, 
        diarization_pipeline=diarization_pipeline)

    output = pipeline(audio_input,
                      num_speakers = num_speakers)

    # Clean User name
    text = ""
    for i in range(len(output)):
        speaker = output[i]['speaker']
        words = output[i]['text']
        text += f"{speaker}: {words}\n"

    return text


def click_message():
    return "Results loading. Go to next page!"


def default_table():
    return pd.DataFrame({
        "Default": ["SPEAKER_00", "SPEAKER_01", "SPEAKER_02", "SPEAKER_03","SPEAKER_04"],
        "Name": ["", "", "", "", ""],
        "Title": ["", "", "", "", ""]
    })

def tempfile_generator():
    # Generate a unique temporary file name
    temp_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=False)
    temp_file_name = temp_file.name
    temp_file.close()
    return temp_file_name


def substitute_names(speaker_names, num_speakers, text):
    # Clean Speaker names
    df = speaker_names.itertuples(index=False)
    df = df[0:(num_speakers-1)]
    for default, name, title in df:
        if title != "":
            title = " ("+title.strip()+")"
        text = text.replace(default, f"{name.strip()}{title}")

    # Make file downloadable
    temp_file_name = tempfile_generator()
    with open(temp_file_name, "w") as file:
        file.write(text)


def main(conf):

    with gr.Blocks(theme=gr.themes.Soft(text_size="lg")) as demo:
    
        with gr.TabItem(conf["layout"]["page_names"][0]):
            
            gr.Markdown("# 🎤 Non-Video Meeting Transcription and Speaker Diarization")
            gr.Markdown("![](file/microphone_pen_and_paper.png)")
            gr.Markdown(get_files.load_markdown_file(conf["layout"]["about"]))
        

        with gr.TabItem(conf["layout"]["page_names"][1]):
            gr.Markdown("# 🔊 Upload or record your meeting")
            audio_input = gr.Audio(type="filepath", label="Upload Audio File")
            num_speakers = gr.Dropdown(list(range(conf["session"]["min_speakers"], 
                                                  conf["session"]["max_speakers"]+1)),
                                       label="Number of Speakers", 
                                       value=conf["session"]["min_speakers"])

            process_button = gr.Button("Process")
            output_box = gr.Textbox(label="Progress")

            
        with gr.TabItem(conf["layout"]["page_names"][2]):
            gr.Markdown("# 📄 View and download your meeting transcript")
            transcription_output = gr.Textbox(label="Transcription Review")
            
            speaker_names = gr.Dataframe(
                label="Match output names to desired names and titles/responsibility. Only enter values for Name and Title",
                headers=["Default", "Name", "Title"],
                datatype=["str", "str"],
                row_count=(5,"fixed"),
                col_count=(3, "fixed"),
                type="pandas",
                value=default_table(),
            )
            fix_button = gr.Button("Fix and Prepare Download")
            label_file_link = gr.File(label="Download Cleaned Transcript")

        # Process
        process_button.click(fn=click_message,
                             outputs=output_box)
        
        process_button.click(
            fn=process_meeting,
            inputs=[audio_input, num_speakers],
            outputs=[transcription_output]
        )

        fix_button.click(
            fn=substitute_names,
            inputs=[speaker_names, num_speakers, transcription_output],
            outputs=[label_file_link]
        )

    demo.launch(server_name="0.0.0.0", allowed_paths=["/"])




if __name__ == "__main__":
    # get config
    conf = get_files.json_cfg()
    
    # initialize diarizer
    diarizer = Diarizer(conf)
    
    transcriber = Transcriber(conf)
    # run main
    main(conf)