Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,7 @@
|
|
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
from utilities.setup import get_files
|
3 |
#import spaces
|
4 |
from services.diarization import Diarizer
|
@@ -7,7 +10,7 @@ from speechbox import ASRDiarizationPipeline
|
|
7 |
|
8 |
|
9 |
#@spaces.GPU
|
10 |
-
def process_meeting(audio_input, num_speakers
|
11 |
"""
|
12 |
audio_input: filepath --> str
|
13 |
num_speakers: number --> int
|
@@ -15,22 +18,54 @@ def process_meeting(audio_input, num_speakers, speaker_names):
|
|
15 |
"""
|
16 |
|
17 |
print(audio_input)
|
18 |
-
#
|
19 |
-
#diarization_result, label_file = diarizer.run(audio_input, num_speakers)
|
20 |
diarization_pipeline = diarizer.get_pipeline()
|
21 |
-
# Next, pass it through a transctiption stage
|
22 |
asr_pipeline = transcriber.get_pipeline()
|
23 |
-
|
|
|
24 |
pipeline = ASRDiarizationPipeline(
|
25 |
asr_pipeline=asr_pipeline,
|
26 |
diarization_pipeline=diarization_pipeline)
|
27 |
-
|
28 |
-
# Finally, Clean up the docs.
|
29 |
output = pipeline(audio_input,
|
30 |
num_speakers = num_speakers)
|
31 |
-
print(output)
|
32 |
-
return "a", None #diarization_result, label_file
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
|
36 |
def main(conf):
|
@@ -51,30 +86,41 @@ def main(conf):
|
|
51 |
conf["session"]["max_speakers"])),
|
52 |
label="Number of Speakers",
|
53 |
value=conf["session"]["min_speakers"])
|
54 |
-
speaker_names = gr.Dataframe(
|
55 |
-
label="Type your names and details. Your actual entries will be limited to the speakers you selected above.",
|
56 |
-
headers=["Name", "Supporting Details"],
|
57 |
-
datatype=["str", "str"],
|
58 |
-
row_count=(5,"fixed"),
|
59 |
-
col_count=(2, "fixed"),
|
60 |
-
type="pandas"
|
61 |
-
)
|
62 |
|
63 |
process_button = gr.Button("Process")
|
|
|
64 |
|
65 |
|
66 |
with gr.TabItem(conf["layout"]["page_names"][2]):
|
67 |
gr.Markdown("# π View and download your meeting transcript")
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
# Process
|
|
|
|
|
73 |
|
74 |
process_button.click(
|
75 |
fn=process_meeting,
|
76 |
-
inputs=[audio_input, num_speakers
|
77 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
)
|
79 |
|
80 |
demo.launch(server_name="0.0.0.0", allowed_paths=["/"])
|
@@ -85,7 +131,10 @@ def main(conf):
|
|
85 |
if __name__ == "__main__":
|
86 |
# get config
|
87 |
conf = get_files.json_cfg()
|
|
|
88 |
# initialize diarizer
|
89 |
diarizer = Diarizer(conf)
|
|
|
90 |
transcriber = Transcriber(conf)
|
|
|
91 |
main(conf)
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
from utilities.setup import get_files
|
6 |
#import spaces
|
7 |
from services.diarization import Diarizer
|
|
|
10 |
|
11 |
|
12 |
#@spaces.GPU
|
13 |
+
def process_meeting(audio_input, num_speakers):
|
14 |
"""
|
15 |
audio_input: filepath --> str
|
16 |
num_speakers: number --> int
|
|
|
18 |
"""
|
19 |
|
20 |
print(audio_input)
|
21 |
+
# Get diarization and transcription pipelines
|
|
|
22 |
diarization_pipeline = diarizer.get_pipeline()
|
|
|
23 |
asr_pipeline = transcriber.get_pipeline()
|
24 |
+
|
25 |
+
# Pass it into speechbox for prediction and cleaning
|
26 |
pipeline = ASRDiarizationPipeline(
|
27 |
asr_pipeline=asr_pipeline,
|
28 |
diarization_pipeline=diarization_pipeline)
|
29 |
+
|
|
|
30 |
output = pipeline(audio_input,
|
31 |
num_speakers = num_speakers)
|
|
|
|
|
32 |
|
33 |
+
# Clean User name
|
34 |
+
text = ""
|
35 |
+
for i in range(len(output)):
|
36 |
+
speaker = output[i]['speaker']
|
37 |
+
words = output[i]['text']
|
38 |
+
text += f"{speaker}: {words}\n"
|
39 |
+
|
40 |
+
return text
|
41 |
+
|
42 |
+
|
43 |
+
def click_message():
|
44 |
+
return "Results loading. Go to next page!"
|
45 |
+
|
46 |
+
|
47 |
+
def default_table():
|
48 |
+
return pd.DataFrame({
|
49 |
+
"Default": ["SPEAKER_00", "SPEAKER_01", "SPEAKER_02", "SPEAKER_03","SPEAKER_04"],
|
50 |
+
"Name": ["", "", "", "", ""],
|
51 |
+
"Title": ["", "", "", "", ""]
|
52 |
+
})
|
53 |
+
|
54 |
+
def substitue_names(speaker_names, text):
|
55 |
+
|
56 |
+
df = speaker_names.itertuples(index=False)
|
57 |
+
for default, name, title in df:
|
58 |
+
if title <> "":
|
59 |
+
title = " ("+title+")"
|
60 |
+
|
61 |
+
text = text.replace(default, f"{name}{title}")
|
62 |
+
|
63 |
+
temp_file = "transcript.txt"
|
64 |
+
with open(temp_file, "w") as file:
|
65 |
+
file.write(text)
|
66 |
+
|
67 |
+
#os.remove(temp_file)
|
68 |
+
return temp_file
|
69 |
|
70 |
|
71 |
def main(conf):
|
|
|
86 |
conf["session"]["max_speakers"])),
|
87 |
label="Number of Speakers",
|
88 |
value=conf["session"]["min_speakers"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
process_button = gr.Button("Process")
|
91 |
+
output_box = gr.Textbox()
|
92 |
|
93 |
|
94 |
with gr.TabItem(conf["layout"]["page_names"][2]):
|
95 |
gr.Markdown("# π View and download your meeting transcript")
|
96 |
+
transcription_output = gr.Textbox(label="Transcription Review")
|
97 |
+
|
98 |
+
speaker_names = gr.Dataframe(
|
99 |
+
label="Match output names to desired names and titles/responsibility. Only enter values for ",
|
100 |
+
headers=["Default", "Name", "Title"],
|
101 |
+
datatype=["str", "str"],
|
102 |
+
row_count=(5,"fixed"),
|
103 |
+
col_count=(3, "fixed"),
|
104 |
+
type="pandas",
|
105 |
+
value=default_table(),
|
106 |
+
)
|
107 |
+
fix_button = gr.Button("Fix and Prepare Download")
|
108 |
+
label_file_link = gr.File(label="Download Cleaned Transcription")
|
109 |
|
110 |
# Process
|
111 |
+
process_button.click(fn=click_message,
|
112 |
+
outputs=output_box)
|
113 |
|
114 |
process_button.click(
|
115 |
fn=process_meeting,
|
116 |
+
inputs=[audio_input, num_speakers],
|
117 |
+
outputs=[transcription_output]
|
118 |
+
)
|
119 |
+
|
120 |
+
fix_button.click(
|
121 |
+
fn=substitue_names,
|
122 |
+
inputs=[speaker_names, transcription_output],
|
123 |
+
outputs=[label_file_link]
|
124 |
)
|
125 |
|
126 |
demo.launch(server_name="0.0.0.0", allowed_paths=["/"])
|
|
|
131 |
if __name__ == "__main__":
|
132 |
# get config
|
133 |
conf = get_files.json_cfg()
|
134 |
+
|
135 |
# initialize diarizer
|
136 |
diarizer = Diarizer(conf)
|
137 |
+
|
138 |
transcriber = Transcriber(conf)
|
139 |
+
# run main
|
140 |
main(conf)
|