Spaces:

Kuuhaakuu
/

Benchmark_SeamlessM4T_v2_Large_Vs_Whisper_Darija

Sleeping

App Files Files Community

Kuuhaakuu commited on Mar 19

Commit

ef0b4df

•

1 Parent(s): eb89e16

Create app.py

Browse files

Files changed (1) hide show

app.py +117 -0

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import gradio as gr
+import json
+import os
+import random
+import matplotlib.pyplot as plt
+# Load transcription results
+with open("./drive/MyDrive/results/gradio_results.json", "r", encoding='utf-8') as file:
+    gradio_transcriptions = json.load(file)
+with open("./drive/MyDrive/results/openai_results.json", "r", encoding='utf-8') as file:
+    openai_transcriptions = json.load(file)
+audio_files_directory = "./drive/MyDrive/chunks"
+def get_random_audio_and_transcriptions():
+    random_choice = random.choice(os.listdir(audio_files_directory))
+    audio_path = os.path.join(audio_files_directory, random_choice)
+    base_name = os.path.splitext(random_choice)[0]
+    gradio_transcription = next((t for t in gradio_transcriptions if t['chunk'].startswith(base_name)), {'text': ''})['text']
+    openai_transcription = next((t for t in openai_transcriptions if t['chunk'].startswith(base_name)), {'text': ''})['text']
+    return audio_path, gradio_transcription, openai_transcription
+def handle_vote(vote, audio_path, gradio_transcription, openai_transcription):
+    votes_file = "./drive/MyDrive/results/votes.json"
+    # Ensure vote key is in lowercase to match dictionary keys
+    vote = vote.lower()
+    if os.path.exists(votes_file):
+        with open(votes_file, "r", encoding='utf-8') as file:
+            votes = json.load(file)
+    else:
+        votes = {}
+    key = os.path.basename(audio_path)
+    if key not in votes:
+        votes[key] = {"seamlessm4t": 0, "whisper": 0, "tie": 0}
+    if vote in votes[key]:
+        votes[key][vote] += 1
+    else:
+        print(f"Invalid vote option: {vote}. Valid options are 'gradio', 'openai', and 'tie'.")
+    with open(votes_file, "w", encoding='utf-8') as file:
+        json.dump(votes, file, indent=4)
+def calculate_vote_totals():
+    votes_file = "./drive/MyDrive/results/votes.json"
+    if os.path.exists(votes_file):
+        with open(votes_file, "r", encoding='utf-8') as file:
+            votes_data = json.load(file)
+    else:
+        print("No votes have been recorded yet.")
+        return None
+    # Initialize totals
+    totals = {"seamlessm4t": 0, "whisper": 0, "tie": 0}
+    # Aggregate votes
+    for _, vote_counts in votes_data.items():
+        for key in totals:
+            totals[key] += vote_counts.get(key, 0)
+    return totals
+def show_results():
+    totals = calculate_vote_totals()
+    if totals:
+        # Create a bar graph
+        labels = list(["SeamlessM4T", "Whisper", "Tie"])
+        values = list(totals.values())
+        plt.figure(figsize=(8, 6))
+        plt.bar(labels, values, color=['cornflowerblue', 'lavender', 'red'])
+        plt.xlabel('Models')
+        plt.ylabel('Votes')
+        plt.title('Vote Distribution')
+        plt.xticks(labels)
+        plt.ylim(0, max(values) + 1)  # Set y-axis limit to make the graph aesthetically pleasing
+        return plt
+    else:
+        # Return an empty plot if no votes are found
+        plt.figure(figsize=(8, 6))
+        return plt
+def setup_interface():
+    with gr.Blocks() as demo:
+        vote_options = gr.Radio(choices=["SeamlessM4T", "Whisper", "Tie"], label="Vote")
+        submit_button = gr.Button("Submit Vote")
+        gradio_transcription = gr.Textbox(label="SeamlessM4T-V2-large Transcription", interactive=False)
+        openai_transcription = gr.Textbox(label="OpenAI Whisper Transcription", interactive=False)
+        audio_player = gr.Audio(label="Listen to the Audio", interactive=False)
+        def submit_vote(vote):
+            audio_path, gr_transcription, oa_transcription = get_random_audio_and_transcriptions()
+            if vote:  # Ensure a vote was made
+                handle_vote(vote, audio_path, gr_transcription, oa_transcription)
+            # Return new data to update the UI components
+            return gr_transcription, oa_transcription, audio_path
+        submit_button.click(submit_vote, inputs=[vote_options], outputs=[gradio_transcription, openai_transcription, audio_player])
+        show_results_button = gr.Button("Show Results")
+        results_plot = gr.Plot()
+        show_results_button.click(show_results, inputs=[], outputs=results_plot)
+        # Initialize with data
+        initial_data = get_random_audio_and_transcriptions()
+        gradio_transcription.value = initial_data[1]
+        openai_transcription.value = initial_data[2]
+        audio_player.value = initial_data[0]
+    return demo
+demo = setup_interface()
+demo.launch(debug=True)