Spaces:

hf-audio
/

open_asr_leaderboard

Running on CPU Upgrade

App Files Files Community

open_asr_leaderboard / app.py

reach-vb HF staff

Drop common voice and update rtfx (#17)

2deac9d verified 3 months ago

raw

history blame

5.48 kB

	import gradio as gr
	import pandas as pd
	import json
	from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS
	from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
	from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
	from datetime import datetime, timezone

	LAST_UPDATED = "Aug 12th 2024"

	column_names = {
	"MODEL": "Model",
	"Avg. WER": "Average WER ⬇️",
	"Avg. RTFx": "RTFx ⬆️️",
	"AMI WER": "AMI",
	"Earnings22 WER": "Earnings22",
	"Gigaspeech WER": "Gigaspeech",
	"LS Clean WER": "LS Clean",
	"LS Other WER": "LS Other",
	"SPGISpeech WER": "SPGISpeech",
	"Tedlium WER": "Tedlium",
	"Voxpopuli WER": "Voxpopuli",
	}

	eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()

	if not csv_results.exists():
	raise Exception(f"CSV file {csv_results} does not exist locally")

	# Get csv with data and parse columns
	original_df = pd.read_csv(csv_results)

	# Formats the columns
	def formatter(x):
	if type(x) is str:
	x = x
	else:
	x = round(x, 2)
	return x

	for col in original_df.columns:
	if col == "model":
	original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
	else:
	original_df[col] = original_df[col].apply(formatter) # For numerical values

	original_df.rename(columns=column_names, inplace=True)
	original_df.sort_values(by='Average WER ⬇️', inplace=True)

	COLS = [c.name for c in fields(AutoEvalColumn)]
	TYPES = [c.type for c in fields(AutoEvalColumn)]


	def request_model(model_text, chbcoco2017):

	# Determine the selected checkboxes
	dataset_selection = []
	if chbcoco2017:
	dataset_selection.append("ESB Datasets tests only")

	if len(dataset_selection) == 0:
	return styled_error("You need to select at least one dataset")

	base_model_on_hub, error_msg = is_model_on_hub(model_text)

	if not base_model_on_hub:
	return styled_error(f"Base model '{model_text}' {error_msg}")

	# Construct the output dictionary
	current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
	required_datasets = ', '.join(dataset_selection)
	eval_entry = {
	"date": current_time,
	"model": model_text,
	"datasets_selected": required_datasets
	}

	# Prepare file path
	DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)

	fn_datasets = '@ '.join(dataset_selection)
	filename = model_text.replace("/","@") + "@@" + fn_datasets
	if filename in requested_models:
	return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.")
	try:
	filename_ext = filename + ".txt"
	out_filepath = DIR_OUTPUT_REQUESTS / filename_ext

	# Write the results to a text file
	with open(out_filepath, "w") as f:
	f.write(json.dumps(eval_entry))

	upload_file(filename, out_filepath)

	# Include file in the list of uploaded files
	requested_models.append(filename)

	# Remove the local file
	out_filepath.unlink()

	return styled_message("🤗 Your request has been submitted and will be evaluated soon!</p>")
	except Exception as e:
	return styled_error(f"Error submitting request!")

	with gr.Blocks() as demo:
	gr.HTML(BANNER, elem_id="banner")
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
	leaderboard_table = gr.components.Dataframe(
	value=original_df,
	datatype=TYPES,
	elem_id="leaderboard-table",
	interactive=False,
	visible=True,
	)

	with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
	gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")

	with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=2):
	with gr.Column():
	gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
	with gr.Column():
	gr.Markdown("Select a dataset:", elem_classes="markdown-text")
	with gr.Column():
	model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")
	chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset", visible=False, value=True, interactive=False)
	with gr.Column():
	mdw_submission_result = gr.Markdown()
	btn_submitt = gr.Button(value="🚀 Request")
	btn_submitt.click(request_model,
	[model_name_textbox, chb_coco2017],
	mdw_submission_result)

	gr.Markdown(f"Last updated on {LAST_UPDATED}", elem_classes="markdown-text")

	with gr.Row():
	with gr.Accordion("📙 Citation", open=False):
	gr.Textbox(
	value=CITATION_TEXT, lines=7,
	label="Copy the BibTeX snippet to cite this source",
	elem_id="citation-button",
	show_copy_button=True,
	)

	demo.launch()