ZeroCommand
/

test-giskard-evaluator

Model card Files Files and versions Community

test-giskard-evaluator / app_legacy.py

ZeroCommand

Upload folder using huggingface_hub

433de9b verified 9 months ago

raw

history blame contribute delete

18.6 kB

	import json
	import logging
	import os
	import subprocess
	import time

	import datasets
	import gradio as gr
	import huggingface_hub
	from transformers.pipelines import TextClassificationPipeline

	from io_utils import (
	convert_column_mapping_to_json,
	read_inference_type,
	read_scanners,
	write_inference_type,
	write_scanners,
	)
	from text_classification import (
	check_column_mapping_keys_validity,
	text_classification_fix_column_mapping,
	)
	from wordings import CONFIRM_MAPPING_DETAILS_FAIL_MD, CONFIRM_MAPPING_DETAILS_MD

	HF_REPO_ID = "HF_REPO_ID"
	HF_SPACE_ID = "SPACE_ID"
	HF_WRITE_TOKEN = "HF_WRITE_TOKEN"


	def check_model(model_id):
	try:
	task = huggingface_hub.model_info(model_id).pipeline_tag
	except Exception:
	return None, None

	try:
	from transformers import pipeline

	ppl = pipeline(task=task, model=model_id)

	return model_id, ppl
	except Exception as e:
	return model_id, e


	def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
	try:
	configs = datasets.get_dataset_config_names(dataset_id)
	except Exception:
	# Dataset may not exist
	return None, dataset_config, dataset_split

	if dataset_config not in configs:
	# Need to choose dataset subset (config)
	return dataset_id, configs, dataset_split

	ds = datasets.load_dataset(dataset_id, dataset_config)

	if isinstance(ds, datasets.DatasetDict):
	# Need to choose dataset split
	if dataset_split not in ds.keys():
	return dataset_id, None, list(ds.keys())
	elif not isinstance(ds, datasets.Dataset):
	# Unknown type
	return dataset_id, None, None
	return dataset_id, dataset_config, dataset_split


	def try_validate(
	m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping="{}"
	):
	# Validate model
	if m_id is None:
	gr.Warning(
	"Model is not accessible. Please set your HF_TOKEN if it is a private model."
	)
	return (
	gr.update(interactive=False), # Submit button
	gr.update(visible=True), # Loading row
	gr.update(visible=False), # Preview row
	gr.update(visible=False), # Model prediction input
	gr.update(visible=False), # Model prediction preview
	gr.update(visible=False), # Label mapping preview
	gr.update(visible=False), # feature mapping preview
	)
	if isinstance(ppl, Exception):
	gr.Warning(f'Failed to load model": {ppl}')
	return (
	gr.update(interactive=False), # Submit button
	gr.update(visible=True), # Loading row
	gr.update(visible=False), # Preview row
	gr.update(visible=False), # Model prediction input
	gr.update(visible=False), # Model prediction preview
	gr.update(visible=False), # Label mapping preview
	gr.update(visible=False), # feature mapping preview
	)

	# Validate dataset
	d_id, config, split = check_dataset(
	dataset_id=dataset_id,
	dataset_config=dataset_config,
	dataset_split=dataset_split,
	)

	dataset_ok = False
	if d_id is None:
	gr.Warning(
	f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.'
	)
	elif isinstance(config, list):
	gr.Warning(
	f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.'
	)
	config = gr.update(choices=config, value=config[0])
	elif isinstance(split, list):
	gr.Warning(
	f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.'
	)
	split = gr.update(choices=split, value=split[0])
	else:
	dataset_ok = True

	if not dataset_ok:
	return (
	gr.update(interactive=False), # Submit button
	gr.update(visible=True), # Loading row
	gr.update(visible=False), # Preview row
	gr.update(visible=False), # Model prediction input
	gr.update(visible=False), # Model prediction preview
	gr.update(visible=False), # Label mapping preview
	gr.update(visible=False), # feature mapping preview
	)

	# TODO: Validate column mapping by running once
	prediction_result = None
	id2label_df = None
	if isinstance(ppl, TextClassificationPipeline):
	try:
	column_mapping = json.loads(column_mapping)
	except Exception:
	column_mapping = {}

	(
	column_mapping,
	prediction_input,
	prediction_result,
	id2label_df,
	feature_df,
	) = text_classification_fix_column_mapping(
	column_mapping, ppl, d_id, config, split
	)

	column_mapping = json.dumps(column_mapping, indent=2)

	if prediction_result is None and id2label_df is not None:
	gr.Warning(
	'The model failed to predict with the first row in the dataset. Please provide feature mappings in "Advance" settings.'
	)
	return (
	gr.update(interactive=False), # Submit button
	gr.update(visible=False), # Loading row
	gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
	gr.update(
	value=f"Sample Input: {prediction_input}", visible=True
	), # Model prediction input
	gr.update(visible=False), # Model prediction preview
	gr.update(
	value=id2label_df, visible=True, interactive=True
	), # Label mapping preview
	gr.update(
	value=feature_df, visible=True, interactive=True
	), # feature mapping preview
	)
	elif id2label_df is None:
	gr.Warning(
	'The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.'
	)
	return (
	gr.update(interactive=False), # Submit button
	gr.update(visible=False), # Loading row
	gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
	gr.update(
	value=f"Sample Input: {prediction_input}", visible=True
	), # Model prediction input
	gr.update(
	value=prediction_result, visible=True
	), # Model prediction preview
	gr.update(visible=True, interactive=True), # Label mapping preview
	gr.update(visible=True, interactive=True), # feature mapping preview
	)

	gr.Info(
	"Model and dataset validations passed. Your can submit the evaluation task."
	)

	return (
	gr.update(interactive=True), # Submit button
	gr.update(visible=False), # Loading row
	gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
	gr.update(
	value=f"Sample Input: {prediction_input}", visible=True
	), # Model prediction input
	gr.update(value=prediction_result, visible=True), # Model prediction preview
	gr.update(
	value=id2label_df, visible=True, interactive=True
	), # Label mapping preview
	gr.update(
	value=feature_df, visible=True, interactive=True
	), # feature mapping preview
	)


	def try_submit(
	m_id,
	d_id,
	config,
	split,
	id2label_mapping_dataframe,
	feature_mapping_dataframe,
	local,
	):
	label_mapping = {}
	for i, label in id2label_mapping_dataframe["Model Prediction Labels"].items():
	label_mapping.update({str(i): label})

	feature_mapping = {}
	for i, feature in feature_mapping_dataframe["Dataset Features"].items():
	feature_mapping.update(
	{feature_mapping_dataframe["Model Input Features"][i]: feature}
	)

	# TODO: Set column mapping for some dataset such as `amazon_polarity`

	if local:
	command = [
	"giskard_scanner",
	"--loader",
	"huggingface",
	"--model",
	m_id,
	"--dataset",
	d_id,
	"--dataset_config",
	config,
	"--dataset_split",
	split,
	"--hf_token",
	os.environ.get(HF_WRITE_TOKEN),
	"--discussion_repo",
	os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
	"--output_format",
	"markdown",
	"--output_portal",
	"huggingface",
	"--feature_mapping",
	json.dumps(feature_mapping),
	"--label_mapping",
	json.dumps(label_mapping),
	"--scan_config",
	"../config.yaml",
	]

	eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
	start = time.time()
	logging.info(f"Start local evaluation on {eval_str}")

	evaluator = subprocess.Popen(
	command,
	stderr=subprocess.STDOUT,
	)
	result = evaluator.wait()

	logging.info(
	f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s"
	)

	gr.Info(
	f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s"
	)
	else:
	gr.Info("TODO: Submit task to an endpoint")

	return gr.update(interactive=True) # Submit button


	def get_demo():
	# gr.themes.Soft(
	# primary_hue="green",
	# )

	def check_dataset_and_get_config(dataset_id):
	try:
	configs = datasets.get_dataset_config_names(dataset_id)
	return gr.Dropdown(configs, value=configs[0], visible=True)
	except Exception:
	# Dataset may not exist
	pass

	def check_dataset_and_get_split(dataset_config, dataset_id):
	try:
	splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
	return gr.Dropdown(splits, value=splits[0], visible=True)
	except Exception as e:
	# Dataset may not exist
	gr.Warning(
	f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
	)

	def clear_column_mapping_tables():
	return [
	gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
	gr.update(value=[], visible=False, interactive=True),
	gr.update(value=[], visible=False, interactive=True),
	]

	def gate_validate_btn(
	model_id,
	dataset_id,
	dataset_config,
	dataset_split,
	id2label_mapping_dataframe=None,
	feature_mapping_dataframe=None,
	):
	column_mapping = "{}"
	_, ppl = check_model(model_id=model_id)

	if id2label_mapping_dataframe is not None:
	labels = convert_column_mapping_to_json(
	id2label_mapping_dataframe.value, label="data"
	)
	features = convert_column_mapping_to_json(
	feature_mapping_dataframe.value, label="text"
	)
	column_mapping = json.dumps({labels, features}, indent=2)

	if check_column_mapping_keys_validity(column_mapping, ppl) is False:
	gr.Warning("Label mapping table has invalid contents. Please check again.")
	return (
	gr.update(interactive=False),
	gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
	gr.update(),
	gr.update(),
	gr.update(),
	gr.update(),
	gr.update(),
	)
	else:
	if model_id and dataset_id and dataset_config and dataset_split:
	return try_validate(
	model_id,
	ppl,
	dataset_id,
	dataset_config,
	dataset_split,
	column_mapping,
	)
	else:
	return (
	gr.update(interactive=False),
	gr.update(visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	)

	with gr.Row():
	gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
	with gr.Row():
	run_local = gr.Checkbox(value=True, label="Run in this Space")
	use_inference = read_inference_type("./config.yaml") == "hf_inference_api"
	run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")

	with gr.Row():
	selected = read_scanners("./config.yaml")
	scan_config = selected + ["data_leakage"]
	scanners = gr.CheckboxGroup(
	choices=scan_config, value=selected, label="Scan Settings", visible=True
	)

	with gr.Row():
	model_id_input = gr.Textbox(
	label="Hugging Face model id",
	placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest",
	)

	dataset_id_input = gr.Textbox(
	label="Hugging Face Dataset id",
	placeholder="tweet_eval",
	)
	with gr.Row():
	dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
	dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)

	with gr.Row(visible=True) as loading_row:
	gr.Markdown(
	"""
	<p style="text-align: center;">
	🚀🐢Please validate your model and dataset first...
	</p>
	"""
	)

	with gr.Row(visible=False) as preview_row:
	gr.Markdown(
	"""
	<h1 style="text-align: center;">
	Confirm Pre-processing Details
	</h1>
	Base on your model and dataset, we inferred this label mapping and feature mapping. <b>If the mapping is incorrect, please modify it in the table below.</b>
	"""
	)

	with gr.Row():
	id2label_mapping_dataframe = gr.DataFrame(
	label="Preview of label mapping", interactive=True, visible=False
	)
	feature_mapping_dataframe = gr.DataFrame(
	label="Preview of feature mapping", interactive=True, visible=False
	)
	with gr.Row():
	example_input = gr.Markdown("Sample Input: ", visible=False)

	with gr.Row():
	example_labels = gr.Label(label="Model Prediction Sample", visible=False)

	run_btn = gr.Button(
	"Get Evaluation Result",
	variant="primary",
	interactive=False,
	size="lg",
	)

	model_id_input.blur(
	clear_column_mapping_tables,
	outputs=[id2label_mapping_dataframe, feature_mapping_dataframe],
	)

	dataset_id_input.blur(
	check_dataset_and_get_config, dataset_id_input, dataset_config_input
	)
	dataset_id_input.submit(
	check_dataset_and_get_config, dataset_id_input, dataset_config_input
	)

	dataset_config_input.change(
	check_dataset_and_get_split,
	inputs=[dataset_config_input, dataset_id_input],
	outputs=[dataset_split_input],
	)

	dataset_id_input.blur(
	clear_column_mapping_tables,
	outputs=[id2label_mapping_dataframe, feature_mapping_dataframe],
	)
	# model_id_input.blur(gate_validate_btn,
	# inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
	# outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
	# dataset_id_input.blur(gate_validate_btn,
	# inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
	# outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
	dataset_config_input.change(
	gate_validate_btn,
	inputs=[
	model_id_input,
	dataset_id_input,
	dataset_config_input,
	dataset_split_input,
	],
	outputs=[
	run_btn,
	loading_row,
	preview_row,
	example_input,
	example_labels,
	id2label_mapping_dataframe,
	feature_mapping_dataframe,
	],
	)
	dataset_split_input.change(
	gate_validate_btn,
	inputs=[
	model_id_input,
	dataset_id_input,
	dataset_config_input,
	dataset_split_input,
	],
	outputs=[
	run_btn,
	loading_row,
	preview_row,
	example_input,
	example_labels,
	id2label_mapping_dataframe,
	feature_mapping_dataframe,
	],
	)
	id2label_mapping_dataframe.input(
	gate_validate_btn,
	inputs=[
	model_id_input,
	dataset_id_input,
	dataset_config_input,
	dataset_split_input,
	id2label_mapping_dataframe,
	feature_mapping_dataframe,
	],
	outputs=[
	run_btn,
	loading_row,
	preview_row,
	example_input,
	example_labels,
	id2label_mapping_dataframe,
	feature_mapping_dataframe,
	],
	)
	feature_mapping_dataframe.input(
	gate_validate_btn,
	inputs=[
	model_id_input,
	dataset_id_input,
	dataset_config_input,
	dataset_split_input,
	id2label_mapping_dataframe,
	feature_mapping_dataframe,
	],
	outputs=[
	run_btn,
	loading_row,
	preview_row,
	example_input,
	example_labels,
	id2label_mapping_dataframe,
	feature_mapping_dataframe,
	],
	)
	scanners.change(write_scanners, inputs=scanners)
	run_inference.change(write_inference_type, inputs=[run_inference])

	run_btn.click(
	try_submit,
	inputs=[
	model_id_input,
	dataset_id_input,
	dataset_config_input,
	dataset_split_input,
	id2label_mapping_dataframe,
	feature_mapping_dataframe,
	run_local,
	],
	outputs=[
	run_btn,
	],
	)