import os import tempfile import gradio as gr import pandas as pd from utilities.setup import get_files #import spaces from services.diarization import Diarizer from services.asr import Transcriber from speechbox import ASRDiarizationPipeline #@spaces.GPU def process_meeting(audio_input, num_speakers): """ audio_input: filepath --> str num_speakers: number --> int speaker_names: dataset --> np.array """ print(audio_input) # Get diarization and transcription pipelines diarization_pipeline = diarizer.get_pipeline() asr_pipeline = transcriber.get_pipeline() # Pass it into speechbox for prediction and cleaning pipeline = ASRDiarizationPipeline( asr_pipeline=asr_pipeline, diarization_pipeline=diarization_pipeline) output = pipeline(audio_input, num_speakers = num_speakers) # Clean User name text = "" for i in range(len(output)): speaker = output[i]['speaker'] words = output[i]['text'] text += f"{speaker}: {words}\n" return text def click_message(): return "Results loading. Go to next page!" def default_table(): return pd.DataFrame({ "Default": ["SPEAKER_00", "SPEAKER_01", "SPEAKER_02", "SPEAKER_03","SPEAKER_04"], "Name": ["", "", "", "", ""], "Title": ["", "", "", "", ""] }) def tempfile_generator(): # Generate a unique temporary file name temp_file = tempfile.NamedTemporaryFile(suffix='.txt', delete=False) temp_file_name = temp_file.name temp_file.close() return temp_file_name def substitute_names(speaker_names, num_speakers, text): # Clean Speaker names df = speaker_names.itertuples(index=False) df = df[0:(num_speakers-1)] for default, name, title in df: if title != "": title = " ("+title.strip()+")" text = text.replace(default, f"{name.strip()}{title}") # Make file downloadable temp_file_name = tempfile_generator() with open(temp_file_name, "w") as file: file.write(text) def main(conf): with gr.Blocks(theme=gr.themes.Soft(text_size="lg")) as demo: with gr.TabItem(conf["layout"]["page_names"][0]): gr.Markdown("# 🎤 Non-Video Meeting Transcription and Speaker Diarization") gr.Markdown("![](file/microphone_pen_and_paper.png)") gr.Markdown(get_files.load_markdown_file(conf["layout"]["about"])) with gr.TabItem(conf["layout"]["page_names"][1]): gr.Markdown("# 🔊 Upload or record your meeting") audio_input = gr.Audio(type="filepath", label="Upload Audio File") num_speakers = gr.Dropdown(list(range(conf["session"]["min_speakers"], conf["session"]["max_speakers"]+1)), label="Number of Speakers", value=conf["session"]["min_speakers"]) process_button = gr.Button("Process") output_box = gr.Textbox(label="Progress") with gr.TabItem(conf["layout"]["page_names"][2]): gr.Markdown("# 📄 View and download your meeting transcript") transcription_output = gr.Textbox(label="Transcription Review") speaker_names = gr.Dataframe( label="Match output names to desired names and titles/responsibility. Only enter values for Name and Title", headers=["Default", "Name", "Title"], datatype=["str", "str"], row_count=(5,"fixed"), col_count=(3, "fixed"), type="pandas", value=default_table(), ) fix_button = gr.Button("Fix and Prepare Download") label_file_link = gr.File(label="Download Cleaned Transcript") # Process process_button.click(fn=click_message, outputs=output_box) process_button.click( fn=process_meeting, inputs=[audio_input, num_speakers], outputs=[transcription_output] ) fix_button.click( fn=substitute_names, inputs=[speaker_names, num_speakers, transcription_output], outputs=[label_file_link] ) demo.launch(server_name="0.0.0.0", allowed_paths=["/"]) if __name__ == "__main__": # get config conf = get_files.json_cfg() # initialize diarizer diarizer = Diarizer(conf) transcriber = Transcriber(conf) # run main main(conf)