File size: 4,637 Bytes
0cc20f3
7e338ce
36f1349
0cc20f3
 
adf1205
5650727
 
b91b901
afa0cd6
 
adf1205
 
0cc20f3
32c34a8
 
 
 
 
9ae2604
1c763ac
0cc20f3
afa0cd6
 
0cc20f3
 
afa0cd6
 
70762cf
0cc20f3
70762cf
dee336f
adf1205
0cc20f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f92dc6
 
 
 
 
 
0cc20f3
7e338ce
adafc31
8f92dc6
0cc20f3
adafc31
0cc20f3
ea58d27
8f92dc6
 
0cc20f3
8f92dc6
 
7e338ce
 
adf1205
6b5ce4a
adf1205
 
 
 
 
 
004be7e
1829484
adf1205
 
 
 
77b5e94
7f2009a
004be7e
8f92dc6
6b5ce4a
004be7e
6b5ce4a
adf1205
adafc31
004be7e
 
77b5e94
adafc31
0cc20f3
 
 
8f92dc6
0cc20f3
 
 
 
 
 
 
 
adafc31
004be7e
 
0cc20f3
 
adf1205
004be7e
 
0cc20f3
 
 
 
 
ea58d27
adafc31
0cc20f3
adf1205
 
6b5ce4a
adf1205
 
 
 
 
5650727
adf1205
0cc20f3
5650727
 
0cc20f3
b91b901
0cc20f3
adf1205
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
import tempfile
import gradio as gr 
import pandas as pd

from utilities.setup import get_files
#import spaces
from services.diarization import Diarizer
from services.asr import Transcriber
from speechbox import ASRDiarizationPipeline


#@spaces.GPU
def process_meeting(audio_input, num_speakers):
    """
    audio_input: filepath --> str
    num_speakers: number --> int
    speaker_names: dataset --> np.array
    """
    
    print(audio_input) 
    # Get diarization and transcription pipelines
    diarization_pipeline = diarizer.get_pipeline()
    asr_pipeline = transcriber.get_pipeline()

    # Pass it into speechbox for prediction and cleaning
    pipeline = ASRDiarizationPipeline(
        asr_pipeline=asr_pipeline, 
        diarization_pipeline=diarization_pipeline)

    output = pipeline(audio_input,
                      num_speakers = num_speakers)

    # Clean User name
    text = ""
    for i in range(len(output)):
        speaker = output[i]['speaker']
        words = output[i]['text']
        text += f"{speaker}: {words}\n"

    return text


def click_message():
    return "Results loading. Go to next page!"


def default_table():
    return pd.DataFrame({
        "Default": ["SPEAKER_00", "SPEAKER_01", "SPEAKER_02", "SPEAKER_03","SPEAKER_04"],
        "Name": ["", "", "", "", ""],
        "Title": ["", "", "", "", ""]
    })

def tempfile_generator():
    # Generate a unique temporary file name
    temp_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=False)
    temp_file_name = temp_file.name
    temp_file.close()
    return temp_file_name


def substitute_names(speaker_names, num_speakers, text):
    # Clean Speaker names
    df = speaker_names.itertuples(index=False)
    df = df[0:(num_speakers-1)]
    for default, name, title in df:
        if title != "":
            title = " ("+title.strip()+")"
        text = text.replace(default, f"{name.strip()}{title}")

    # Make file downloadable
    temp_file_name = tempfile_generator()
    with open(temp_file_name, "w") as file:
        file.write(text)


def main(conf):

    with gr.Blocks(theme=gr.themes.Soft(text_size="lg")) as demo:
    
        with gr.TabItem(conf["layout"]["page_names"][0]):
            
            gr.Markdown("# 🎀 Non-Video Meeting Transcription and Speaker Diarization")
            gr.Markdown("![](file/microphone_pen_and_paper.png)")
            gr.Markdown(get_files.load_markdown_file(conf["layout"]["about"]))
        

        with gr.TabItem(conf["layout"]["page_names"][1]):
            gr.Markdown("# πŸ”Š Upload or record your meeting")
            audio_input = gr.Audio(type="filepath", label="Upload Audio File")
            num_speakers = gr.Dropdown(list(range(conf["session"]["min_speakers"], 
                                                  conf["session"]["max_speakers"]+1)),
                                       label="Number of Speakers", 
                                       value=conf["session"]["min_speakers"])

            process_button = gr.Button("Process")
            output_box = gr.Textbox(label="Progress")

            
        with gr.TabItem(conf["layout"]["page_names"][2]):
            gr.Markdown("# πŸ“„ View and download your meeting transcript")
            transcription_output = gr.Textbox(label="Transcription Review")
            
            speaker_names = gr.Dataframe(
                label="Match output names to desired names and titles/responsibility. Only enter values for Name and Title",
                headers=["Default", "Name", "Title"],
                datatype=["str", "str"],
                row_count=(5,"fixed"),
                col_count=(3, "fixed"),
                type="pandas",
                value=default_table(),
            )
            fix_button = gr.Button("Fix and Prepare Download")
            label_file_link = gr.File(label="Download Cleaned Transcript")

        # Process
        process_button.click(fn=click_message,
                             outputs=output_box)
        
        process_button.click(
            fn=process_meeting,
            inputs=[audio_input, num_speakers],
            outputs=[transcription_output]
        )

        fix_button.click(
            fn=substitute_names,
            inputs=[speaker_names, num_speakers, transcription_output],
            outputs=[label_file_link]
        )

    demo.launch(server_name="0.0.0.0", allowed_paths=["/"])




if __name__ == "__main__":
    # get config
    conf = get_files.json_cfg()
    
    # initialize diarizer
    diarizer = Diarizer(conf)
    
    transcriber = Transcriber(conf)
    # run main
    main(conf)