tykiww's picture
Update app.py
f390e12 verified
import os
import tempfile
import gradio as gr
import pandas as pd
from utilities.setup import get_files
#import spaces
from services.diarization import Diarizer
from services.asr import Transcriber
from speechbox import ASRDiarizationPipeline
#@spaces.GPU
def process_meeting(audio_input, num_speakers):
"""
audio_input: filepath --> str
num_speakers: number --> int
speaker_names: dataset --> np.array
"""
print(audio_input)
# Get diarization and transcription pipelines
diarization_pipeline = diarizer.get_pipeline()
asr_pipeline = transcriber.get_pipeline()
# Pass it into speechbox for prediction and cleaning
pipeline = ASRDiarizationPipeline(
asr_pipeline=asr_pipeline,
diarization_pipeline=diarization_pipeline)
output = pipeline(audio_input,
num_speakers = num_speakers)
# Clean User name
text = ""
for i in range(len(output)):
speaker = output[i]['speaker']
words = output[i]['text']
text += f"{speaker}: {words}\n"
return text
def click_message():
return "Results loading. Go to next page!"
def default_table():
return pd.DataFrame({
"Default": ["SPEAKER_00", "SPEAKER_01", "SPEAKER_02", "SPEAKER_03","SPEAKER_04"],
"Name": ["", "", "", "", ""],
"Title": ["", "", "", "", ""]
})
def tempfile_generator():
# Generate a unique temporary file name
temp_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=False)
temp_file_name = temp_file.name
temp_file.close()
return temp_file_name
def substitute_names(speaker_names, num_speakers, text):
# Clean Speaker names
df = speaker_names.itertuples(index=False)
df = df[0:(num_speakers-1)]
for default, name, title in df:
if title != "":
title = " ("+title.strip()+")"
text = text.replace(default, f"{name.strip()}{title}")
# Make file downloadable
temp_file_name = tempfile_generator()
with open(temp_file_name, "w") as file:
file.write(text)
def main(conf):
with gr.Blocks(theme=gr.themes.Soft(text_size="lg")) as demo:
with gr.TabItem(conf["layout"]["page_names"][0]):
gr.Markdown("# 🎀 Non-Video Meeting Transcription and Speaker Diarization")
gr.Markdown("![](file/microphone_pen_and_paper.png)")
gr.Markdown(get_files.load_markdown_file(conf["layout"]["about"]))
with gr.TabItem(conf["layout"]["page_names"][1]):
gr.Markdown("# πŸ”Š Upload or record your meeting")
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
num_speakers = gr.Dropdown(list(range(conf["session"]["min_speakers"],
conf["session"]["max_speakers"]+1)),
label="Number of Speakers",
value=conf["session"]["min_speakers"])
process_button = gr.Button("Process")
output_box = gr.Textbox(label="Progress")
with gr.TabItem(conf["layout"]["page_names"][2]):
gr.Markdown("# πŸ“„ View and download your meeting transcript")
transcription_output = gr.Textbox(label="Transcription Review")
speaker_names = gr.Dataframe(
label="Match output names to desired names and titles/responsibility. Only enter values for Name and Title",
headers=["Default", "Name", "Title"],
datatype=["str", "str"],
row_count=(5,"fixed"),
col_count=(3, "fixed"),
type="pandas",
value=default_table(),
)
fix_button = gr.Button("Fix and Prepare Download")
label_file_link = gr.File(label="Download Cleaned Transcript")
# Process
process_button.click(fn=click_message,
outputs=output_box)
process_button.click(
fn=process_meeting,
inputs=[audio_input, num_speakers],
outputs=[transcription_output]
)
fix_button.click(
fn=substitute_names,
inputs=[speaker_names, num_speakers, transcription_output],
outputs=[label_file_link]
)
demo.launch(server_name="0.0.0.0", allowed_paths=["/"])
if __name__ == "__main__":
# get config
conf = get_files.json_cfg()
# initialize diarizer
diarizer = Diarizer(conf)
transcriber = Transcriber(conf)
# run main
main(conf)