ZeroCommand commited on
Commit
433de9b
1 Parent(s): d753141

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Giskard Evaluator
3
+ emoji: 🐢🔍
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 4.7.1
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import atexit
2
+
3
+ import gradio as gr
4
+
5
+ from app_debug import get_demo as get_demo_debug
6
+ from app_leaderboard import get_demo as get_demo_leaderboard
7
+ from app_text_classification import get_demo as get_demo_text_classification
8
+ from run_jobs import start_process_run_job, stop_thread
9
+
10
+ try:
11
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
12
+ with gr.Tab("Text Classification"):
13
+ get_demo_text_classification()
14
+ with gr.Tab("Leaderboard") as leaderboard_tab:
15
+ get_demo_leaderboard(leaderboard_tab)
16
+ with gr.Tab("Logs(Debug)"):
17
+ get_demo_debug()
18
+
19
+ start_process_run_job()
20
+
21
+ demo.queue(max_size=1000)
22
+ demo.launch(share=False)
23
+ atexit.register(stop_thread)
24
+
25
+ except Exception as e:
26
+ print("stop background thread: ", e)
27
+ stop_thread()
app_debug.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import listdir
2
+ from os.path import isfile, join
3
+ import html
4
+
5
+ import gradio as gr
6
+
7
+ import pipe
8
+ from io_utils import get_logs_file
9
+
10
+ LOG_PATH = "./tmp"
11
+ CONFIG_PATH = "./cicd/configs/"
12
+ MAX_FILES_NUM = 20
13
+
14
+
15
+ def get_accordions_of_files(path, files):
16
+ components = [None for _ in range(0, MAX_FILES_NUM)]
17
+ for i in range(0, len(files)):
18
+ if i >= MAX_FILES_NUM:
19
+ break
20
+ with open(join(path, files[i]), "r") as f:
21
+ components[i] = f.read()
22
+ return components
23
+
24
+
25
+ def get_accordions_of_log_files():
26
+ log_files = [
27
+ f for f in listdir(LOG_PATH) if isfile(join(LOG_PATH, f)) and f.endswith("_log")
28
+ ]
29
+ return get_accordions_of_files(LOG_PATH, log_files)
30
+
31
+
32
+ def get_accordions_of_config_files():
33
+ config_files = [
34
+ f
35
+ for f in listdir(CONFIG_PATH)
36
+ if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")
37
+ ]
38
+ return get_accordions_of_files(CONFIG_PATH, config_files)
39
+
40
+
41
+ def get_config_files():
42
+ config_files = [
43
+ join(CONFIG_PATH, f)
44
+ for f in listdir(CONFIG_PATH)
45
+ if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")
46
+ ]
47
+ return config_files
48
+
49
+
50
+ def get_log_files():
51
+ return [
52
+ join(LOG_PATH, f)
53
+ for f in listdir(LOG_PATH)
54
+ if isfile(join(LOG_PATH, f)) and f.endswith("log")
55
+ ]
56
+
57
+
58
+ def get_jobs_info_in_queue():
59
+ return [f"⌛️job id {html.escape(job[0])}: {html.escape(job[2])}<br/>" for job in pipe.jobs]
60
+
61
+ def get_queue_status():
62
+ if len(pipe.jobs) > 0 or pipe.current is not None:
63
+ current = pipe.current
64
+ if current is None:
65
+ current = "None"
66
+ return f'<div style="padding-top: 5%">Current job: {html.escape(current)} <br/> Jobs in queue: <br/> {"".join(get_jobs_info_in_queue())}</div>'
67
+ else:
68
+ return '<div style="padding-top: 5%">No jobs in queue, please submit an evaluation task from another tab.</div>'
69
+
70
+
71
+ def get_demo():
72
+ with gr.Row():
73
+ gr.HTML(
74
+ value=get_queue_status,
75
+ every=5,
76
+ )
77
+ with gr.Accordion(label="Log Files", open=False):
78
+ with gr.Row():
79
+ gr.Files(value=get_log_files, label="Log Files", every=10)
80
+ with gr.Row():
81
+ gr.Textbox(
82
+ value=get_logs_file, every=0.5, lines=10, visible=True, label="Current Log File"
83
+ )
84
+ with gr.Accordion(label="Config Files", open=False):
85
+ gr.Files(value=get_config_files, label="Config Files", every=10)
app_env.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+
2
+ HF_REPO_ID = "HF_REPO_ID"
3
+ HF_SPACE_ID = "SPACE_ID"
4
+ HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
5
+ HF_GSK_HUB_URL = "GSK_HUB_URL"
6
+ HF_GSK_HUB_PROJECT_KEY = "GSK_HUB_PROJECT_KEY"
7
+ HF_GSK_HUB_KEY = "GSK_API_KEY"
8
+ HF_GSK_HUB_HF_TOKEN = "GSK_HF_TOKEN"
9
+ HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN"
app_leaderboard.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import datasets
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import datetime
7
+
8
+ from fetch_utils import (check_dataset_and_get_config,
9
+ check_dataset_and_get_split)
10
+
11
+ import leaderboard
12
+ logger = logging.getLogger(__name__)
13
+ global update_time
14
+ update_time = datetime.datetime.fromtimestamp(0)
15
+
16
+ def get_records_from_dataset_repo(dataset_id):
17
+ dataset_config = check_dataset_and_get_config(dataset_id)
18
+
19
+ logger.info(f"Dataset {dataset_id} has configs {dataset_config}")
20
+ dataset_split = check_dataset_and_get_split(dataset_id, dataset_config[0])
21
+ logger.info(f"Dataset {dataset_id} has splits {dataset_split}")
22
+
23
+ try:
24
+ ds = datasets.load_dataset(dataset_id, dataset_config[0])[dataset_split[0]]
25
+ df = ds.to_pandas()
26
+ return df
27
+ except Exception as e:
28
+ logger.warning(
29
+ f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
30
+ )
31
+ return pd.DataFrame()
32
+
33
+
34
+ def get_model_ids(ds):
35
+ logging.info(f"Dataset {ds} column names: {ds['model_id']}")
36
+ models = ds["model_id"].tolist()
37
+ # return unique elements in the list model_ids
38
+ model_ids = list(set(models))
39
+ model_ids.insert(0, "Any")
40
+ return model_ids
41
+
42
+
43
+ def get_dataset_ids(ds):
44
+ logging.info(f"Dataset {ds} column names: {ds['dataset_id']}")
45
+ datasets = ds["dataset_id"].tolist()
46
+ dataset_ids = list(set(datasets))
47
+ dataset_ids.insert(0, "Any")
48
+ return dataset_ids
49
+
50
+
51
+ def get_types(ds):
52
+ # set types for each column
53
+ types = [str(t) for t in ds.dtypes.to_list()]
54
+ types = [t.replace("object", "markdown") for t in types]
55
+ types = [t.replace("float64", "number") for t in types]
56
+ types = [t.replace("int64", "number") for t in types]
57
+ return types
58
+
59
+
60
+ def get_display_df(df):
61
+ # style all elements in the model_id column
62
+ display_df = df.copy()
63
+ columns = display_df.columns.tolist()
64
+ if "model_id" in columns:
65
+ display_df["model_id"] = display_df["model_id"].apply(
66
+ lambda x: f'<a href="https://huggingface.co/{x}" target="_blank" style="color:blue">🔗{x}</a>'
67
+ )
68
+ # style all elements in the dataset_id column
69
+ if "dataset_id" in columns:
70
+ display_df["dataset_id"] = display_df["dataset_id"].apply(
71
+ lambda x: f'<a href="https://huggingface.co/datasets/{x}" target="_blank" style="color:blue">🔗{x}</a>'
72
+ )
73
+ # style all elements in the report_link column
74
+ if "report_link" in columns:
75
+ display_df["report_link"] = display_df["report_link"].apply(
76
+ lambda x: f'<a href="{x}" target="_blank" style="color:blue">🔗{x}</a>'
77
+ )
78
+ return display_df
79
+
80
+ def get_demo(leaderboard_tab):
81
+ global update_time
82
+ update_time = datetime.datetime.now()
83
+ logger.info("Loading leaderboard records")
84
+ leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
85
+ records = leaderboard.records
86
+
87
+ model_ids = get_model_ids(records)
88
+ dataset_ids = get_dataset_ids(records)
89
+
90
+ column_names = records.columns.tolist()
91
+ default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
92
+ default_df = records[default_columns] # extract columns selected
93
+ types = get_types(default_df)
94
+ display_df = get_display_df(default_df) # the styled dataframe to display
95
+
96
+ with gr.Row():
97
+ task_select = gr.Dropdown(
98
+ label="Task",
99
+ choices=["text_classification", "tabular"],
100
+ value="text_classification",
101
+ interactive=True,
102
+ )
103
+ model_select = gr.Dropdown(
104
+ label="Model id", choices=model_ids, value=model_ids[0], interactive=True
105
+ )
106
+ dataset_select = gr.Dropdown(
107
+ label="Dataset id",
108
+ choices=dataset_ids,
109
+ value=dataset_ids[0],
110
+ interactive=True,
111
+ )
112
+
113
+ with gr.Row():
114
+ columns_select = gr.CheckboxGroup(
115
+ label="Show columns",
116
+ choices=column_names,
117
+ value=default_columns,
118
+ interactive=True,
119
+ )
120
+
121
+ with gr.Row():
122
+ leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
123
+
124
+ def update_leaderboard_records(model_id, dataset_id, columns, task):
125
+ global update_time
126
+ if datetime.datetime.now() - update_time < datetime.timedelta(minutes=10):
127
+ return gr.update()
128
+ update_time = datetime.datetime.now()
129
+ logger.info("Updating leaderboard records")
130
+ leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
131
+ return filter_table(model_id, dataset_id, columns, task)
132
+
133
+ leaderboard_tab.select(
134
+ fn=update_leaderboard_records,
135
+ inputs=[model_select, dataset_select, columns_select, task_select],
136
+ outputs=[leaderboard_df])
137
+
138
+ @gr.on(
139
+ triggers=[
140
+ model_select.change,
141
+ dataset_select.change,
142
+ columns_select.change,
143
+ task_select.change,
144
+ ],
145
+ inputs=[model_select, dataset_select, columns_select, task_select],
146
+ outputs=[leaderboard_df],
147
+ )
148
+ def filter_table(model_id, dataset_id, columns, task):
149
+ logger.info("Filtering leaderboard records")
150
+ records = leaderboard.records
151
+ # filter the table based on task
152
+ df = records[(records["task"] == task)]
153
+ # filter the table based on the model_id and dataset_id
154
+ if model_id and model_id != "Any":
155
+ df = df[(df["model_id"] == model_id)]
156
+ if dataset_id and dataset_id != "Any":
157
+ df = df[(df["dataset_id"] == dataset_id)]
158
+
159
+ # filter the table based on the columns
160
+ df = df[columns]
161
+ types = get_types(df)
162
+ display_df = get_display_df(df)
163
+ return gr.update(value=display_df, datatype=types, interactive=False)
app_legacy.py ADDED
@@ -0,0 +1,556 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ import subprocess
5
+ import time
6
+
7
+ import datasets
8
+ import gradio as gr
9
+ import huggingface_hub
10
+ from transformers.pipelines import TextClassificationPipeline
11
+
12
+ from io_utils import (
13
+ convert_column_mapping_to_json,
14
+ read_inference_type,
15
+ read_scanners,
16
+ write_inference_type,
17
+ write_scanners,
18
+ )
19
+ from text_classification import (
20
+ check_column_mapping_keys_validity,
21
+ text_classification_fix_column_mapping,
22
+ )
23
+ from wordings import CONFIRM_MAPPING_DETAILS_FAIL_MD, CONFIRM_MAPPING_DETAILS_MD
24
+
25
+ HF_REPO_ID = "HF_REPO_ID"
26
+ HF_SPACE_ID = "SPACE_ID"
27
+ HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
28
+
29
+
30
+ def check_model(model_id):
31
+ try:
32
+ task = huggingface_hub.model_info(model_id).pipeline_tag
33
+ except Exception:
34
+ return None, None
35
+
36
+ try:
37
+ from transformers import pipeline
38
+
39
+ ppl = pipeline(task=task, model=model_id)
40
+
41
+ return model_id, ppl
42
+ except Exception as e:
43
+ return model_id, e
44
+
45
+
46
+ def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
47
+ try:
48
+ configs = datasets.get_dataset_config_names(dataset_id)
49
+ except Exception:
50
+ # Dataset may not exist
51
+ return None, dataset_config, dataset_split
52
+
53
+ if dataset_config not in configs:
54
+ # Need to choose dataset subset (config)
55
+ return dataset_id, configs, dataset_split
56
+
57
+ ds = datasets.load_dataset(dataset_id, dataset_config)
58
+
59
+ if isinstance(ds, datasets.DatasetDict):
60
+ # Need to choose dataset split
61
+ if dataset_split not in ds.keys():
62
+ return dataset_id, None, list(ds.keys())
63
+ elif not isinstance(ds, datasets.Dataset):
64
+ # Unknown type
65
+ return dataset_id, None, None
66
+ return dataset_id, dataset_config, dataset_split
67
+
68
+
69
+ def try_validate(
70
+ m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping="{}"
71
+ ):
72
+ # Validate model
73
+ if m_id is None:
74
+ gr.Warning(
75
+ "Model is not accessible. Please set your HF_TOKEN if it is a private model."
76
+ )
77
+ return (
78
+ gr.update(interactive=False), # Submit button
79
+ gr.update(visible=True), # Loading row
80
+ gr.update(visible=False), # Preview row
81
+ gr.update(visible=False), # Model prediction input
82
+ gr.update(visible=False), # Model prediction preview
83
+ gr.update(visible=False), # Label mapping preview
84
+ gr.update(visible=False), # feature mapping preview
85
+ )
86
+ if isinstance(ppl, Exception):
87
+ gr.Warning(f'Failed to load model": {ppl}')
88
+ return (
89
+ gr.update(interactive=False), # Submit button
90
+ gr.update(visible=True), # Loading row
91
+ gr.update(visible=False), # Preview row
92
+ gr.update(visible=False), # Model prediction input
93
+ gr.update(visible=False), # Model prediction preview
94
+ gr.update(visible=False), # Label mapping preview
95
+ gr.update(visible=False), # feature mapping preview
96
+ )
97
+
98
+ # Validate dataset
99
+ d_id, config, split = check_dataset(
100
+ dataset_id=dataset_id,
101
+ dataset_config=dataset_config,
102
+ dataset_split=dataset_split,
103
+ )
104
+
105
+ dataset_ok = False
106
+ if d_id is None:
107
+ gr.Warning(
108
+ f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.'
109
+ )
110
+ elif isinstance(config, list):
111
+ gr.Warning(
112
+ f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.'
113
+ )
114
+ config = gr.update(choices=config, value=config[0])
115
+ elif isinstance(split, list):
116
+ gr.Warning(
117
+ f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.'
118
+ )
119
+ split = gr.update(choices=split, value=split[0])
120
+ else:
121
+ dataset_ok = True
122
+
123
+ if not dataset_ok:
124
+ return (
125
+ gr.update(interactive=False), # Submit button
126
+ gr.update(visible=True), # Loading row
127
+ gr.update(visible=False), # Preview row
128
+ gr.update(visible=False), # Model prediction input
129
+ gr.update(visible=False), # Model prediction preview
130
+ gr.update(visible=False), # Label mapping preview
131
+ gr.update(visible=False), # feature mapping preview
132
+ )
133
+
134
+ # TODO: Validate column mapping by running once
135
+ prediction_result = None
136
+ id2label_df = None
137
+ if isinstance(ppl, TextClassificationPipeline):
138
+ try:
139
+ column_mapping = json.loads(column_mapping)
140
+ except Exception:
141
+ column_mapping = {}
142
+
143
+ (
144
+ column_mapping,
145
+ prediction_input,
146
+ prediction_result,
147
+ id2label_df,
148
+ feature_df,
149
+ ) = text_classification_fix_column_mapping(
150
+ column_mapping, ppl, d_id, config, split
151
+ )
152
+
153
+ column_mapping = json.dumps(column_mapping, indent=2)
154
+
155
+ if prediction_result is None and id2label_df is not None:
156
+ gr.Warning(
157
+ 'The model failed to predict with the first row in the dataset. Please provide feature mappings in "Advance" settings.'
158
+ )
159
+ return (
160
+ gr.update(interactive=False), # Submit button
161
+ gr.update(visible=False), # Loading row
162
+ gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
163
+ gr.update(
164
+ value=f"**Sample Input**: {prediction_input}", visible=True
165
+ ), # Model prediction input
166
+ gr.update(visible=False), # Model prediction preview
167
+ gr.update(
168
+ value=id2label_df, visible=True, interactive=True
169
+ ), # Label mapping preview
170
+ gr.update(
171
+ value=feature_df, visible=True, interactive=True
172
+ ), # feature mapping preview
173
+ )
174
+ elif id2label_df is None:
175
+ gr.Warning(
176
+ 'The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.'
177
+ )
178
+ return (
179
+ gr.update(interactive=False), # Submit button
180
+ gr.update(visible=False), # Loading row
181
+ gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
182
+ gr.update(
183
+ value=f"**Sample Input**: {prediction_input}", visible=True
184
+ ), # Model prediction input
185
+ gr.update(
186
+ value=prediction_result, visible=True
187
+ ), # Model prediction preview
188
+ gr.update(visible=True, interactive=True), # Label mapping preview
189
+ gr.update(visible=True, interactive=True), # feature mapping preview
190
+ )
191
+
192
+ gr.Info(
193
+ "Model and dataset validations passed. Your can submit the evaluation task."
194
+ )
195
+
196
+ return (
197
+ gr.update(interactive=True), # Submit button
198
+ gr.update(visible=False), # Loading row
199
+ gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
200
+ gr.update(
201
+ value=f"**Sample Input**: {prediction_input}", visible=True
202
+ ), # Model prediction input
203
+ gr.update(value=prediction_result, visible=True), # Model prediction preview
204
+ gr.update(
205
+ value=id2label_df, visible=True, interactive=True
206
+ ), # Label mapping preview
207
+ gr.update(
208
+ value=feature_df, visible=True, interactive=True
209
+ ), # feature mapping preview
210
+ )
211
+
212
+
213
+ def try_submit(
214
+ m_id,
215
+ d_id,
216
+ config,
217
+ split,
218
+ id2label_mapping_dataframe,
219
+ feature_mapping_dataframe,
220
+ local,
221
+ ):
222
+ label_mapping = {}
223
+ for i, label in id2label_mapping_dataframe["Model Prediction Labels"].items():
224
+ label_mapping.update({str(i): label})
225
+
226
+ feature_mapping = {}
227
+ for i, feature in feature_mapping_dataframe["Dataset Features"].items():
228
+ feature_mapping.update(
229
+ {feature_mapping_dataframe["Model Input Features"][i]: feature}
230
+ )
231
+
232
+ # TODO: Set column mapping for some dataset such as `amazon_polarity`
233
+
234
+ if local:
235
+ command = [
236
+ "giskard_scanner",
237
+ "--loader",
238
+ "huggingface",
239
+ "--model",
240
+ m_id,
241
+ "--dataset",
242
+ d_id,
243
+ "--dataset_config",
244
+ config,
245
+ "--dataset_split",
246
+ split,
247
+ "--hf_token",
248
+ os.environ.get(HF_WRITE_TOKEN),
249
+ "--discussion_repo",
250
+ os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
251
+ "--output_format",
252
+ "markdown",
253
+ "--output_portal",
254
+ "huggingface",
255
+ "--feature_mapping",
256
+ json.dumps(feature_mapping),
257
+ "--label_mapping",
258
+ json.dumps(label_mapping),
259
+ "--scan_config",
260
+ "../config.yaml",
261
+ ]
262
+
263
+ eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
264
+ start = time.time()
265
+ logging.info(f"Start local evaluation on {eval_str}")
266
+
267
+ evaluator = subprocess.Popen(
268
+ command,
269
+ stderr=subprocess.STDOUT,
270
+ )
271
+ result = evaluator.wait()
272
+
273
+ logging.info(
274
+ f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s"
275
+ )
276
+
277
+ gr.Info(
278
+ f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s"
279
+ )
280
+ else:
281
+ gr.Info("TODO: Submit task to an endpoint")
282
+
283
+ return gr.update(interactive=True) # Submit button
284
+
285
+
286
+ def get_demo():
287
+ # gr.themes.Soft(
288
+ # primary_hue="green",
289
+ # )
290
+
291
+ def check_dataset_and_get_config(dataset_id):
292
+ try:
293
+ configs = datasets.get_dataset_config_names(dataset_id)
294
+ return gr.Dropdown(configs, value=configs[0], visible=True)
295
+ except Exception:
296
+ # Dataset may not exist
297
+ pass
298
+
299
+ def check_dataset_and_get_split(dataset_config, dataset_id):
300
+ try:
301
+ splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
302
+ return gr.Dropdown(splits, value=splits[0], visible=True)
303
+ except Exception as e:
304
+ # Dataset may not exist
305
+ gr.Warning(
306
+ f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
307
+ )
308
+
309
+ def clear_column_mapping_tables():
310
+ return [
311
+ gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
312
+ gr.update(value=[], visible=False, interactive=True),
313
+ gr.update(value=[], visible=False, interactive=True),
314
+ ]
315
+
316
+ def gate_validate_btn(
317
+ model_id,
318
+ dataset_id,
319
+ dataset_config,
320
+ dataset_split,
321
+ id2label_mapping_dataframe=None,
322
+ feature_mapping_dataframe=None,
323
+ ):
324
+ column_mapping = "{}"
325
+ _, ppl = check_model(model_id=model_id)
326
+
327
+ if id2label_mapping_dataframe is not None:
328
+ labels = convert_column_mapping_to_json(
329
+ id2label_mapping_dataframe.value, label="data"
330
+ )
331
+ features = convert_column_mapping_to_json(
332
+ feature_mapping_dataframe.value, label="text"
333
+ )
334
+ column_mapping = json.dumps({**labels, **features}, indent=2)
335
+
336
+ if check_column_mapping_keys_validity(column_mapping, ppl) is False:
337
+ gr.Warning("Label mapping table has invalid contents. Please check again.")
338
+ return (
339
+ gr.update(interactive=False),
340
+ gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
341
+ gr.update(),
342
+ gr.update(),
343
+ gr.update(),
344
+ gr.update(),
345
+ gr.update(),
346
+ )
347
+ else:
348
+ if model_id and dataset_id and dataset_config and dataset_split:
349
+ return try_validate(
350
+ model_id,
351
+ ppl,
352
+ dataset_id,
353
+ dataset_config,
354
+ dataset_split,
355
+ column_mapping,
356
+ )
357
+ else:
358
+ return (
359
+ gr.update(interactive=False),
360
+ gr.update(visible=True),
361
+ gr.update(visible=False),
362
+ gr.update(visible=False),
363
+ gr.update(visible=False),
364
+ gr.update(visible=False),
365
+ gr.update(visible=False),
366
+ )
367
+
368
+ with gr.Row():
369
+ gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
370
+ with gr.Row():
371
+ run_local = gr.Checkbox(value=True, label="Run in this Space")
372
+ use_inference = read_inference_type("./config.yaml") == "hf_inference_api"
373
+ run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
374
+
375
+ with gr.Row():
376
+ selected = read_scanners("./config.yaml")
377
+ scan_config = selected + ["data_leakage"]
378
+ scanners = gr.CheckboxGroup(
379
+ choices=scan_config, value=selected, label="Scan Settings", visible=True
380
+ )
381
+
382
+ with gr.Row():
383
+ model_id_input = gr.Textbox(
384
+ label="Hugging Face model id",
385
+ placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest",
386
+ )
387
+
388
+ dataset_id_input = gr.Textbox(
389
+ label="Hugging Face Dataset id",
390
+ placeholder="tweet_eval",
391
+ )
392
+ with gr.Row():
393
+ dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False)
394
+ dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False)
395
+
396
+ with gr.Row(visible=True) as loading_row:
397
+ gr.Markdown(
398
+ """
399
+ <p style="text-align: center;">
400
+ 🚀🐢Please validate your model and dataset first...
401
+ </p>
402
+ """
403
+ )
404
+
405
+ with gr.Row(visible=False) as preview_row:
406
+ gr.Markdown(
407
+ """
408
+ <h1 style="text-align: center;">
409
+ Confirm Pre-processing Details
410
+ </h1>
411
+ Base on your model and dataset, we inferred this label mapping and feature mapping. <b>If the mapping is incorrect, please modify it in the table below.</b>
412
+ """
413
+ )
414
+
415
+ with gr.Row():
416
+ id2label_mapping_dataframe = gr.DataFrame(
417
+ label="Preview of label mapping", interactive=True, visible=False
418
+ )
419
+ feature_mapping_dataframe = gr.DataFrame(
420
+ label="Preview of feature mapping", interactive=True, visible=False
421
+ )
422
+ with gr.Row():
423
+ example_input = gr.Markdown("Sample Input: ", visible=False)
424
+
425
+ with gr.Row():
426
+ example_labels = gr.Label(label="Model Prediction Sample", visible=False)
427
+
428
+ run_btn = gr.Button(
429
+ "Get Evaluation Result",
430
+ variant="primary",
431
+ interactive=False,
432
+ size="lg",
433
+ )
434
+
435
+ model_id_input.blur(
436
+ clear_column_mapping_tables,
437
+ outputs=[id2label_mapping_dataframe, feature_mapping_dataframe],
438
+ )
439
+
440
+ dataset_id_input.blur(
441
+ check_dataset_and_get_config, dataset_id_input, dataset_config_input
442
+ )
443
+ dataset_id_input.submit(
444
+ check_dataset_and_get_config, dataset_id_input, dataset_config_input
445
+ )
446
+
447
+ dataset_config_input.change(
448
+ check_dataset_and_get_split,
449
+ inputs=[dataset_config_input, dataset_id_input],
450
+ outputs=[dataset_split_input],
451
+ )
452
+
453
+ dataset_id_input.blur(
454
+ clear_column_mapping_tables,
455
+ outputs=[id2label_mapping_dataframe, feature_mapping_dataframe],
456
+ )
457
+ # model_id_input.blur(gate_validate_btn,
458
+ # inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
459
+ # outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
460
+ # dataset_id_input.blur(gate_validate_btn,
461
+ # inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
462
+ # outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
463
+ dataset_config_input.change(
464
+ gate_validate_btn,
465
+ inputs=[
466
+ model_id_input,
467
+ dataset_id_input,
468
+ dataset_config_input,
469
+ dataset_split_input,
470
+ ],
471
+ outputs=[
472
+ run_btn,
473
+ loading_row,
474
+ preview_row,
475
+ example_input,
476
+ example_labels,
477
+ id2label_mapping_dataframe,
478
+ feature_mapping_dataframe,
479
+ ],
480
+ )
481
+ dataset_split_input.change(
482
+ gate_validate_btn,
483
+ inputs=[
484
+ model_id_input,
485
+ dataset_id_input,
486
+ dataset_config_input,
487
+ dataset_split_input,
488
+ ],
489
+ outputs=[
490
+ run_btn,
491
+ loading_row,
492
+ preview_row,
493
+ example_input,
494
+ example_labels,
495
+ id2label_mapping_dataframe,
496
+ feature_mapping_dataframe,
497
+ ],
498
+ )
499
+ id2label_mapping_dataframe.input(
500
+ gate_validate_btn,
501
+ inputs=[
502
+ model_id_input,
503
+ dataset_id_input,
504
+ dataset_config_input,
505
+ dataset_split_input,
506
+ id2label_mapping_dataframe,
507
+ feature_mapping_dataframe,
508
+ ],
509
+ outputs=[
510
+ run_btn,
511
+ loading_row,
512
+ preview_row,
513
+ example_input,
514
+ example_labels,
515
+ id2label_mapping_dataframe,
516
+ feature_mapping_dataframe,
517
+ ],
518
+ )
519
+ feature_mapping_dataframe.input(
520
+ gate_validate_btn,
521
+ inputs=[
522
+ model_id_input,
523
+ dataset_id_input,
524
+ dataset_config_input,
525
+ dataset_split_input,
526
+ id2label_mapping_dataframe,
527
+ feature_mapping_dataframe,
528
+ ],
529
+ outputs=[
530
+ run_btn,
531
+ loading_row,
532
+ preview_row,
533
+ example_input,
534
+ example_labels,
535
+ id2label_mapping_dataframe,
536
+ feature_mapping_dataframe,
537
+ ],
538
+ )
539
+ scanners.change(write_scanners, inputs=scanners)
540
+ run_inference.change(write_inference_type, inputs=[run_inference])
541
+
542
+ run_btn.click(
543
+ try_submit,
544
+ inputs=[
545
+ model_id_input,
546
+ dataset_id_input,
547
+ dataset_config_input,
548
+ dataset_split_input,
549
+ id2label_mapping_dataframe,
550
+ feature_mapping_dataframe,
551
+ run_local,
552
+ ],
553
+ outputs=[
554
+ run_btn,
555
+ ],
556
+ )
app_text_classification.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+
3
+ import gradio as gr
4
+
5
+ from io_utils import get_logs_file, read_scanners, write_scanners
6
+ from text_classification_ui_helpers import (
7
+ get_related_datasets_from_leaderboard,
8
+ align_columns_and_show_prediction,
9
+ check_dataset,
10
+ precheck_model_ds_enable_example_btn,
11
+ try_submit,
12
+ write_column_mapping_to_config,
13
+ )
14
+ from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD, USE_INFERENCE_API_TIP
15
+
16
+ MAX_LABELS = 40
17
+ MAX_FEATURES = 20
18
+
19
+ EXAMPLE_MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"
20
+ CONFIG_PATH = "./config.yaml"
21
+
22
+
23
+ def get_demo():
24
+ with gr.Row():
25
+ gr.Markdown(INTRODUCTION_MD)
26
+ uid_label = gr.Textbox(
27
+ label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
28
+ )
29
+ with gr.Row():
30
+ model_id_input = gr.Textbox(
31
+ label="Hugging Face model id",
32
+ placeholder=EXAMPLE_MODEL_ID + " (press enter to confirm)",
33
+ )
34
+
35
+ with gr.Column():
36
+ dataset_id_input = gr.Dropdown(
37
+ choices=[],
38
+ value="",
39
+ allow_custom_value=True,
40
+ label="Hugging Face Dataset id",
41
+ )
42
+
43
+ with gr.Row():
44
+ dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False, allow_custom_value=True)
45
+ dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False, allow_custom_value=True)
46
+
47
+ with gr.Row():
48
+ first_line_ds = gr.DataFrame(label="Dataset preview", visible=False)
49
+ with gr.Row():
50
+ loading_status = gr.HTML(visible=True)
51
+ with gr.Row():
52
+ example_btn = gr.Button(
53
+ "Validate model & dataset",
54
+ visible=True,
55
+ variant="primary",
56
+ interactive=False,
57
+ )
58
+
59
+ with gr.Row():
60
+ example_input = gr.HTML(visible=False)
61
+ with gr.Row():
62
+ example_prediction = gr.Label(label="Model Prediction Sample", visible=False)
63
+
64
+ with gr.Row():
65
+ with gr.Accordion(
66
+ label="Label and Feature Mapping", visible=False, open=False
67
+ ) as column_mapping_accordion:
68
+ with gr.Row():
69
+ gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
70
+ column_mappings = []
71
+ with gr.Row():
72
+ with gr.Column():
73
+ gr.Markdown("# Label Mapping")
74
+ for _ in range(MAX_LABELS):
75
+ column_mappings.append(gr.Dropdown(visible=False))
76
+ with gr.Column():
77
+ gr.Markdown("# Feature Mapping")
78
+ for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
79
+ column_mappings.append(gr.Dropdown(visible=False))
80
+
81
+ with gr.Accordion(label="Model Wrap Advance Config", open=True):
82
+ gr.HTML(USE_INFERENCE_API_TIP)
83
+
84
+ run_inference = gr.Checkbox(value=True, label="Run with Inference API")
85
+ inference_token = gr.Textbox(
86
+ placeholder="hf-xxxxxxxxxxxxxxxxxxxx",
87
+ value="",
88
+ label="HF Token for Inference API",
89
+ visible=True,
90
+ interactive=True,
91
+ )
92
+
93
+ with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
94
+ scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
95
+
96
+ @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
97
+ def get_scanners(uid):
98
+ selected = read_scanners(uid)
99
+ # currently we remove data_leakage from the default scanners
100
+ # Reason: data_leakage barely raises any issues and takes too many requests
101
+ # when using inference API, causing rate limit error
102
+ scan_config = selected + ["data_leakage"]
103
+ return gr.update(
104
+ choices=scan_config, value=selected, label="Scan Settings", visible=True
105
+ )
106
+
107
+ with gr.Row():
108
+ run_btn = gr.Button(
109
+ "Get Evaluation Result",
110
+ variant="primary",
111
+ interactive=False,
112
+ size="lg",
113
+ )
114
+
115
+ with gr.Row():
116
+ logs = gr.Textbox(
117
+ value=get_logs_file,
118
+ label="Giskard Bot Evaluation Log:",
119
+ visible=False,
120
+ every=0.5,
121
+ )
122
+
123
+
124
+ scanners.change(write_scanners, inputs=[scanners, uid_label])
125
+
126
+ gr.on(
127
+ triggers=[model_id_input.change],
128
+ fn=get_related_datasets_from_leaderboard,
129
+ inputs=[model_id_input],
130
+ outputs=[dataset_id_input],
131
+ ).then(
132
+ fn=check_dataset,
133
+ inputs=[dataset_id_input],
134
+ outputs=[dataset_config_input, dataset_split_input, loading_status]
135
+ )
136
+
137
+ gr.on(
138
+ triggers=[dataset_id_input.input],
139
+ fn=check_dataset,
140
+ inputs=[dataset_id_input],
141
+ outputs=[dataset_config_input, dataset_split_input, loading_status]
142
+ )
143
+
144
+ gr.on(
145
+ triggers=[label.change for label in column_mappings],
146
+ fn=write_column_mapping_to_config,
147
+ inputs=[
148
+ uid_label,
149
+ *column_mappings,
150
+ ],
151
+ )
152
+
153
+ # label.change sometimes does not pass the changed value
154
+ gr.on(
155
+ triggers=[label.input for label in column_mappings],
156
+ fn=write_column_mapping_to_config,
157
+ inputs=[
158
+ uid_label,
159
+ *column_mappings,
160
+ ],
161
+ )
162
+
163
+ gr.on(
164
+ triggers=[
165
+ model_id_input.change,
166
+ dataset_id_input.change,
167
+ dataset_config_input.change,
168
+ dataset_split_input.change,
169
+ ],
170
+ fn=precheck_model_ds_enable_example_btn,
171
+ inputs=[
172
+ model_id_input,
173
+ dataset_id_input,
174
+ dataset_config_input,
175
+ dataset_split_input,
176
+ ],
177
+ outputs=[example_btn, first_line_ds, loading_status],
178
+ )
179
+
180
+ gr.on(
181
+ triggers=[
182
+ example_btn.click,
183
+ ],
184
+ fn=align_columns_and_show_prediction,
185
+ inputs=[
186
+ model_id_input,
187
+ dataset_id_input,
188
+ dataset_config_input,
189
+ dataset_split_input,
190
+ uid_label,
191
+ run_inference,
192
+ inference_token,
193
+ ],
194
+ outputs=[
195
+ example_input,
196
+ example_prediction,
197
+ column_mapping_accordion,
198
+ run_btn,
199
+ loading_status,
200
+ *column_mappings,
201
+ ],
202
+ )
203
+
204
+ gr.on(
205
+ triggers=[
206
+ run_btn.click,
207
+ ],
208
+ fn=try_submit,
209
+ inputs=[
210
+ model_id_input,
211
+ dataset_id_input,
212
+ dataset_config_input,
213
+ dataset_split_input,
214
+ run_inference,
215
+ inference_token,
216
+ uid_label,
217
+ ],
218
+ outputs=[run_btn, logs, uid_label],
219
+ )
220
+
221
+ def enable_run_btn(run_inference, inference_token, model_id, dataset_id, dataset_config, dataset_split):
222
+ if not run_inference or inference_token == "":
223
+ return gr.update(interactive=False)
224
+ if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
225
+ return gr.update(interactive=False)
226
+ return gr.update(interactive=True)
227
+
228
+ gr.on(
229
+ triggers=[
230
+ run_inference.input,
231
+ inference_token.input,
232
+ scanners.input,
233
+ ],
234
+ fn=enable_run_btn,
235
+ inputs=[
236
+ run_inference,
237
+ inference_token,
238
+ model_id_input,
239
+ dataset_id_input,
240
+ dataset_config_input,
241
+ dataset_split_input
242
+ ],
243
+ outputs=[run_btn],
244
+ )
245
+
246
+ gr.on(
247
+ triggers=[label.input for label in column_mappings],
248
+ fn=enable_run_btn,
249
+ inputs=[
250
+ run_inference,
251
+ inference_token,
252
+ model_id_input,
253
+ dataset_id_input,
254
+ dataset_config_input,
255
+ dataset_split_input
256
+ ], # FIXME
257
+ outputs=[run_btn],
258
+ )
cicd/.gitkeep ADDED
File without changes
cicd/configs/.gitkeep ADDED
File without changes
config.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ configuration:
2
+ ethical_bias:
3
+ threshold: 0.05
4
+ performance:
5
+ alpha: 0.05
6
+ detectors:
7
+ - ethical_bias
8
+ - text_perturbation
9
+ - robustness
10
+ - performance
11
+ - underconfidence
12
+ - overconfidence
13
+ - spurious_correlation
fetch_utils.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import datasets
4
+
5
+
6
+ def check_dataset_and_get_config(dataset_id):
7
+ try:
8
+ configs = datasets.get_dataset_config_names(dataset_id)
9
+ return configs
10
+ except Exception:
11
+ # Dataset may not exist
12
+ return None
13
+
14
+
15
+ def check_dataset_and_get_split(dataset_id, dataset_config):
16
+ try:
17
+ ds = datasets.load_dataset(dataset_id, dataset_config)
18
+ except Exception as e:
19
+ # Dataset may not exist
20
+ logging.warning(
21
+ f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
22
+ )
23
+ return None
24
+ try:
25
+ splits = list(ds.keys())
26
+ return splits
27
+ except Exception as e:
28
+ # Dataset has no splits
29
+ logging.warning(
30
+ f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}"
31
+ )
32
+ return None
index.html ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width" />
6
+ <title>My static Space</title>
7
+ <link rel="stylesheet" href="style.css" />
8
+ </head>
9
+ <body>
10
+ <div class="card">
11
+ <h1>Welcome to your static Space!</h1>
12
+ <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
+ <p>
14
+ Also don't forget to check the
15
+ <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
+ </p>
17
+ </div>
18
+ </body>
19
+ </html>
io_utils.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import yaml
4
+
5
+ YAML_PATH = "./cicd/configs"
6
+ LOG_FILE = "temp_log"
7
+
8
+
9
+ class Dumper(yaml.Dumper):
10
+ def increase_indent(self, flow=False, *args, **kwargs):
11
+ return super().increase_indent(flow=flow, indentless=False)
12
+
13
+
14
+ def get_yaml_path(uid):
15
+ if not os.path.exists(YAML_PATH):
16
+ os.makedirs(YAML_PATH)
17
+ if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
18
+ os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
19
+ return f"{YAML_PATH}/{uid}_config.yaml"
20
+
21
+
22
+ # read scanners from yaml file
23
+ # return a list of scanners
24
+ def read_scanners(uid):
25
+ scanners = []
26
+ with open(get_yaml_path(uid), "r") as f:
27
+ config = yaml.load(f, Loader=yaml.FullLoader)
28
+ scanners = config.get("detectors", [])
29
+ return scanners
30
+
31
+
32
+ # convert a list of scanners to yaml file
33
+ def write_scanners(scanners, uid):
34
+ with open(get_yaml_path(uid), "r") as f:
35
+ config = yaml.load(f, Loader=yaml.FullLoader)
36
+ if config:
37
+ config["detectors"] = scanners
38
+ # save scanners to detectors in yaml
39
+ with open(get_yaml_path(uid), "w") as f:
40
+ yaml.dump(config, f, Dumper=Dumper)
41
+
42
+
43
+ # read model_type from yaml file
44
+ def read_inference_type(uid):
45
+ inference_type = ""
46
+ with open(get_yaml_path(uid), "r") as f:
47
+ config = yaml.load(f, Loader=yaml.FullLoader)
48
+ inference_type = config.get("inference_type", "")
49
+ return inference_type
50
+
51
+
52
+ # write model_type to yaml file
53
+ def write_inference_type(use_inference, inference_token, uid):
54
+ with open(get_yaml_path(uid), "r") as f:
55
+ config = yaml.load(f, Loader=yaml.FullLoader)
56
+ if use_inference:
57
+ config["inference_type"] = "hf_inference_api"
58
+ config["inference_token"] = inference_token
59
+ else:
60
+ config["inference_type"] = "hf_pipeline"
61
+ # FIXME: A quick and temp fix for missing token
62
+ config["inference_token"] = ""
63
+ # save inference_type to inference_type in yaml
64
+ with open(get_yaml_path(uid), "w") as f:
65
+ yaml.dump(config, f, Dumper=Dumper)
66
+
67
+
68
+ # read column mapping from yaml file
69
+ def read_column_mapping(uid):
70
+ column_mapping = {}
71
+ with open(get_yaml_path(uid), "r") as f:
72
+ config = yaml.load(f, Loader=yaml.FullLoader)
73
+ if config:
74
+ column_mapping = config.get("column_mapping", dict())
75
+ return column_mapping
76
+
77
+
78
+ # write column mapping to yaml file
79
+ def write_column_mapping(mapping, uid):
80
+ with open(get_yaml_path(uid), "r") as f:
81
+ config = yaml.load(f, Loader=yaml.FullLoader)
82
+
83
+ if config is None:
84
+ return
85
+ if mapping is None and "column_mapping" in config.keys():
86
+ del config["column_mapping"]
87
+ else:
88
+ config["column_mapping"] = mapping
89
+ with open(get_yaml_path(uid), "w") as f:
90
+ # yaml Dumper will by default sort the keys
91
+ yaml.dump(config, f, Dumper=Dumper, sort_keys=False)
92
+
93
+
94
+ # convert column mapping dataframe to json
95
+ def convert_column_mapping_to_json(df, label=""):
96
+ column_mapping = {}
97
+ column_mapping[label] = []
98
+ for _, row in df.iterrows():
99
+ column_mapping[label].append(row.tolist())
100
+ return column_mapping
101
+
102
+
103
+ def get_log_file_with_uid(uid):
104
+ try:
105
+ print(f"Loading {uid}.log")
106
+ with open(f"./tmp/{uid}.log", "a") as file:
107
+ return file.read()
108
+ except Exception:
109
+ return "Log file does not exist"
110
+
111
+
112
+ def get_logs_file():
113
+ try:
114
+ with open(LOG_FILE, "r") as file:
115
+ return file.read()
116
+ except Exception:
117
+ return "Log file does not exist"
118
+
119
+
120
+ def write_log_to_user_file(task_id, log):
121
+ with open(f"./tmp/{task_id}.log", "a") as f:
122
+ f.write(log)
isolated_env.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+
4
+ from io_utils import write_log_to_user_file
5
+
6
+
7
+ def prepare_venv(execution_id, deps):
8
+ python_executable = "python"
9
+ venv_base = f"tmp/venvs/{execution_id}"
10
+
11
+ pip_executable = os.path.join(venv_base, "bin", "pip")
12
+ # Check pyver
13
+ write_log_to_user_file(execution_id, "Checking Python version\n")
14
+ p = subprocess.run([python_executable, "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
15
+ write_log_to_user_file(execution_id, p.stdout.decode())
16
+ if p.returncode != 0:
17
+ raise RuntimeError(f"{p.args} ended with {p.returncode}")
18
+ # Create venv
19
+ write_log_to_user_file(execution_id, "Creating virtual environment\n")
20
+ p = subprocess.run([python_executable, "-m", "venv", venv_base, "--clear"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
21
+ write_log_to_user_file(execution_id, p.stdout.decode())
22
+ if p.returncode != 0:
23
+ raise RuntimeError(f"{p.args} ended with {p.returncode}")
24
+ # Output requirements.txt
25
+ requirement_file = os.path.join(venv_base, "requirements.txt")
26
+ with open(requirement_file, "w") as f:
27
+ f.writelines(deps)
28
+ # Install deps
29
+ write_log_to_user_file(execution_id, "Installing dependencies\n")
30
+ p = subprocess.run([pip_executable, "install", "-r", requirement_file], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
31
+ write_log_to_user_file(execution_id, p.stdout.decode())
32
+ if p.returncode != 0:
33
+ raise RuntimeError(f"{p.args} ended with {p.returncode}")
34
+ return os.path.join(venv_base, "bin", "giskard_scanner")
leaderboard.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ records = pd.DataFrame()
4
+
5
+ LEADERBOARD = "giskard-bot/evaluator-leaderboard"
mlflow_test.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from mlflow.utils.environment import _PythonEnv
3
+ from mlflow.utils.virtualenv import (
4
+ _PYENV_ROOT_DIR,
5
+ _VIRTUALENV_ENVS_DIR,
6
+ _create_virtualenv,
7
+ _get_mlflow_virtualenv_root,
8
+ _get_virtualenv_extra_env_vars,
9
+ _get_virtualenv_name,
10
+ _install_python,
11
+ )
12
+
13
+
14
+ _create_virtualenv(
15
+ "/Users/inoki/giskard-home/projects/credit/models/2a2b6a9c-4050-4bb6-9024-00bf15651262",
16
+ Path("/opt/homebrew/bin/python3.10"),
17
+ Path("/Users/inoki/giskard-home/mlflow-venv1"),
18
+ _PythonEnv()
19
+ )
20
+
output/.gitkeep ADDED
File without changes
pipe.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ jobs = list()
3
+ current = None
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ giskard
2
+ huggingface_hub
3
+ hf-transfer
4
+ torch==2.0.1
5
+ transformers
6
+ datasets
7
+ -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
run_jobs.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ import subprocess
5
+ import threading
6
+ import time
7
+ from pathlib import Path
8
+
9
+ import pipe
10
+ from app_env import (
11
+ HF_GSK_HUB_HF_TOKEN,
12
+ HF_GSK_HUB_KEY,
13
+ HF_GSK_HUB_PROJECT_KEY,
14
+ HF_GSK_HUB_UNLOCK_TOKEN,
15
+ HF_GSK_HUB_URL,
16
+ HF_REPO_ID,
17
+ HF_SPACE_ID,
18
+ HF_WRITE_TOKEN,
19
+ )
20
+ from io_utils import LOG_FILE, get_yaml_path, write_log_to_user_file
21
+ from isolated_env import prepare_venv
22
+ from leaderboard import LEADERBOARD
23
+
24
+ is_running = False
25
+
26
+ logger = logging.getLogger(__file__)
27
+
28
+
29
+ def start_process_run_job():
30
+ try:
31
+ logging.debug("Running jobs in thread")
32
+ global thread, is_running
33
+ thread = threading.Thread(target=run_job)
34
+ thread.daemon = True
35
+ is_running = True
36
+ thread.start()
37
+
38
+ except Exception as e:
39
+ print("Failed to start thread: ", e)
40
+
41
+
42
+ def stop_thread():
43
+ logging.debug("Stop thread")
44
+ global is_running
45
+ is_running = False
46
+
47
+
48
+ def prepare_env_and_get_command(
49
+ m_id,
50
+ d_id,
51
+ config,
52
+ split,
53
+ inference,
54
+ inference_token,
55
+ uid,
56
+ label_mapping,
57
+ feature_mapping,
58
+ ):
59
+ leaderboard_dataset = None
60
+ if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
61
+ leaderboard_dataset = LEADERBOARD
62
+
63
+ inference_type = "hf_pipeline"
64
+ if inference and inference_token:
65
+ inference_type = "hf_inference_api"
66
+
67
+ executable = "giskard_scanner"
68
+ try:
69
+ # Copy the current requirements (might be changed)
70
+ with open("requirements.txt", "r") as f:
71
+ executable = prepare_venv(
72
+ uid,
73
+ "\n".join(f.readlines()),
74
+ )
75
+ logger.info(f"Using {executable} as executable")
76
+ except Exception as e:
77
+ logger.warn(f"Create env failed due to {e}, using the current env as fallback.")
78
+ executable = "giskard_scanner"
79
+
80
+ command = [
81
+ executable,
82
+ "--loader",
83
+ "huggingface",
84
+ "--model",
85
+ m_id,
86
+ "--dataset",
87
+ d_id,
88
+ "--dataset_config",
89
+ config,
90
+ "--dataset_split",
91
+ split,
92
+ "--output_format",
93
+ "markdown",
94
+ "--output_portal",
95
+ "huggingface",
96
+ "--feature_mapping",
97
+ json.dumps(feature_mapping),
98
+ "--label_mapping",
99
+ json.dumps(label_mapping),
100
+ "--scan_config",
101
+ get_yaml_path(uid),
102
+ "--inference_type",
103
+ inference_type,
104
+ "--inference_api_token",
105
+ inference_token,
106
+ ]
107
+ # The token to publish post
108
+ if os.environ.get(HF_WRITE_TOKEN):
109
+ command.append("--hf_token")
110
+ command.append(os.environ.get(HF_WRITE_TOKEN))
111
+
112
+ # The repo to publish post
113
+ if os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID):
114
+ command.append("--discussion_repo")
115
+ # TODO: Replace by the model id
116
+ command.append(os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID))
117
+
118
+ # The repo to publish for ranking
119
+ if leaderboard_dataset:
120
+ command.append("--leaderboard_dataset")
121
+ command.append(leaderboard_dataset)
122
+
123
+ # The info to upload to Giskard hub
124
+ if os.environ.get(HF_GSK_HUB_KEY):
125
+ command.append("--giskard_hub_api_key")
126
+ command.append(os.environ.get(HF_GSK_HUB_KEY))
127
+ if os.environ.get(HF_GSK_HUB_URL):
128
+ command.append("--giskard_hub_url")
129
+ command.append(os.environ.get(HF_GSK_HUB_URL))
130
+ if os.environ.get(HF_GSK_HUB_PROJECT_KEY):
131
+ command.append("--giskard_hub_project_key")
132
+ command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY))
133
+ if os.environ.get(HF_GSK_HUB_HF_TOKEN):
134
+ command.append("--giskard_hub_hf_token")
135
+ command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN))
136
+ if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN):
137
+ command.append("--giskard_hub_unlock_token")
138
+ command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN))
139
+
140
+ eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
141
+
142
+ write_log_to_user_file(
143
+ uid,
144
+ f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
145
+ )
146
+
147
+ return command
148
+
149
+
150
+ def save_job_to_pipe(task_id, job, description, lock):
151
+ with lock:
152
+ pipe.jobs.append((task_id, job, description))
153
+
154
+
155
+ def pop_job_from_pipe():
156
+ if len(pipe.jobs) == 0:
157
+ return
158
+ job_info = pipe.jobs.pop()
159
+ pipe.current = job_info[2]
160
+ task_id = job_info[0]
161
+
162
+ # Link to LOG_FILE
163
+ log_file_path = Path(LOG_FILE)
164
+ if log_file_path.exists():
165
+ log_file_path.unlink()
166
+ os.symlink(f"./tmp/{task_id}.log", LOG_FILE)
167
+
168
+ write_log_to_user_file(task_id, f"Running job id {task_id}\n")
169
+ command = prepare_env_and_get_command(*job_info[1])
170
+
171
+ with open(f"./tmp/{task_id}.log", "a") as log_file:
172
+ p = subprocess.Popen(command, stdout=log_file, stderr=subprocess.STDOUT)
173
+ p.wait()
174
+ pipe.current = None
175
+
176
+
177
+ def run_job():
178
+ global is_running
179
+ while is_running:
180
+ try:
181
+ pop_job_from_pipe()
182
+ time.sleep(10)
183
+ except KeyboardInterrupt:
184
+ logging.debug("KeyboardInterrupt stop background thread")
185
+ is_running = False
186
+ break
scan_config.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ detectors:
2
+ - ethical_bias
3
+ - text_perturbation
4
+ - robustness
5
+ - performance
6
+ - underconfidence
7
+ - overconfidence
8
+ - spurious_correlation
style.css ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ padding: 2rem;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
+ }
5
+
6
+ h1 {
7
+ font-size: 16px;
8
+ margin-top: 0;
9
+ }
10
+
11
+ p {
12
+ color: rgb(107, 114, 128);
13
+ font-size: 15px;
14
+ margin-bottom: 10px;
15
+ margin-top: 5px;
16
+ }
17
+
18
+ .card {
19
+ max-width: 620px;
20
+ margin: 0 auto;
21
+ padding: 16px;
22
+ border: 1px solid lightgray;
23
+ border-radius: 16px;
24
+ }
25
+
26
+ .card p:last-child {
27
+ margin-bottom: 0;
28
+ }
text_classification.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+
4
+ import datasets
5
+ import huggingface_hub
6
+ import pandas as pd
7
+ from transformers import pipeline
8
+ import requests
9
+ import os
10
+
11
+ logger = logging.getLogger(__name__)
12
+ HF_WRITE_TOKEN = "HF_WRITE_TOKEN"
13
+
14
+ logger = logging.getLogger(__file__)
15
+
16
+ class HuggingFaceInferenceAPIResponse:
17
+ def __init__(self, message):
18
+ self.message = message
19
+
20
+
21
+ def get_labels_and_features_from_dataset(ds):
22
+ try:
23
+ dataset_features = ds.features
24
+ label_keys = [i for i in dataset_features.keys() if i.startswith('label')]
25
+ if len(label_keys) == 0: # no labels found
26
+ # return everything for post processing
27
+ return list(dataset_features.keys()), list(dataset_features.keys())
28
+ if not isinstance(dataset_features[label_keys[0]], datasets.ClassLabel):
29
+ if hasattr(dataset_features[label_keys[0]], 'feature'):
30
+ label_feat = dataset_features[label_keys[0]].feature
31
+ labels = label_feat.names
32
+ else:
33
+ labels = dataset_features[label_keys[0]].names
34
+ features = [f for f in dataset_features.keys() if not f.startswith("label")]
35
+ return labels, features
36
+ except Exception as e:
37
+ logging.warning(
38
+ f"Get Labels/Features Failed for dataset: {e}"
39
+ )
40
+ return None, None
41
+
42
+ def check_model_task(model_id):
43
+ # check if model is valid on huggingface
44
+ try:
45
+ task = huggingface_hub.model_info(model_id).pipeline_tag
46
+ if task is None:
47
+ return None
48
+ return task
49
+ except Exception:
50
+ return None
51
+
52
+ def get_model_labels(model_id, example_input):
53
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
54
+ payload = {"inputs": example_input, "options": {"use_cache": True}}
55
+ response = hf_inference_api(model_id, hf_token, payload)
56
+ if "error" in response:
57
+ return None
58
+ return extract_from_response(response, "label")
59
+
60
+ def extract_from_response(data, key):
61
+ results = []
62
+
63
+ if isinstance(data, dict):
64
+ res = data.get(key)
65
+ if res is not None:
66
+ results.append(res)
67
+
68
+ for value in data.values():
69
+ results.extend(extract_from_response(value, key))
70
+
71
+ elif isinstance(data, list):
72
+ for element in data:
73
+ results.extend(extract_from_response(element, key))
74
+
75
+ return results
76
+
77
+ def hf_inference_api(model_id, hf_token, payload):
78
+ hf_inference_api_endpoint = os.environ.get(
79
+ "HF_INFERENCE_ENDPOINT", default="https://api-inference.huggingface.co"
80
+ )
81
+ url = f"{hf_inference_api_endpoint}/models/{model_id}"
82
+ headers = {"Authorization": f"Bearer {hf_token}"}
83
+ response = requests.post(url, headers=headers, json=payload)
84
+ if not hasattr(response, "status_code") or response.status_code != 200:
85
+ logger.warning(f"Request to inference API returns {response}")
86
+ try:
87
+ return response.json()
88
+ except Exception:
89
+ return {"error": response.content}
90
+
91
+ def preload_hf_inference_api(model_id):
92
+ payload = {"inputs": "This is a test", "options": {"use_cache": True, }}
93
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
94
+ hf_inference_api(model_id, hf_token, payload)
95
+
96
+ def check_model_pipeline(model_id):
97
+ try:
98
+ task = huggingface_hub.model_info(model_id).pipeline_tag
99
+ except Exception:
100
+ return None
101
+
102
+ try:
103
+ ppl = pipeline(task=task, model=model_id)
104
+
105
+ return ppl
106
+ except Exception:
107
+ return None
108
+
109
+
110
+ def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
111
+ for model_label in id2label_mapping.keys():
112
+ if model_label.upper() == label.upper():
113
+ return model_label, label
114
+ return None, label
115
+
116
+
117
+ def text_classification_map_model_and_dataset_labels(id2label, dataset_features):
118
+ id2label_mapping = {id2label[k]: None for k in id2label.keys()}
119
+ dataset_labels = None
120
+ for feature in dataset_features.values():
121
+ if not isinstance(feature, datasets.ClassLabel):
122
+ continue
123
+ if len(feature.names) != len(id2label_mapping.keys()):
124
+ continue
125
+
126
+ dataset_labels = feature.names
127
+ # Try to match labels
128
+ for label in feature.names:
129
+ if label in id2label_mapping.keys():
130
+ model_label = label
131
+ else:
132
+ # Try to find case unsensative
133
+ model_label, label = text_classificaiton_match_label_case_unsensative(
134
+ id2label_mapping, label
135
+ )
136
+ if model_label is not None:
137
+ id2label_mapping[model_label] = label
138
+ else:
139
+ print(f"Label {label} is not found in model labels")
140
+
141
+ return id2label_mapping, dataset_labels
142
+
143
+
144
+ """
145
+ params:
146
+ column_mapping: dict
147
+ example: {
148
+ "text": "sentences",
149
+ "label": {
150
+ "label0": "LABEL_0",
151
+ "label1": "LABEL_1"
152
+ }
153
+ }
154
+ ppl: pipeline
155
+ """
156
+
157
+
158
+ def check_column_mapping_keys_validity(column_mapping, ppl):
159
+ # get the element in all the list elements
160
+ column_mapping = json.loads(column_mapping)
161
+ if "data" not in column_mapping.keys():
162
+ return True
163
+ user_labels = set([pair[0] for pair in column_mapping["data"]])
164
+ model_labels = set([pair[1] for pair in column_mapping["data"]])
165
+
166
+ id2label = ppl.model.config.id2label
167
+ original_labels = set(id2label.values())
168
+
169
+ return user_labels == model_labels == original_labels
170
+
171
+
172
+ """
173
+ params:
174
+ column_mapping: dict
175
+ dataset_features: dict
176
+ example: {
177
+ 'text': Value(dtype='string', id=None),
178
+ 'label': ClassLabel(names=['negative', 'neutral', 'positive'], id=None)
179
+ }
180
+ """
181
+
182
+
183
+ def infer_text_input_column(column_mapping, dataset_features):
184
+ # Check whether we need to infer the text input column
185
+ infer_text_input_column = True
186
+ feature_map_df = None
187
+
188
+ if "text" in column_mapping.keys():
189
+ dataset_text_column = column_mapping["text"]
190
+ if dataset_text_column in dataset_features.keys():
191
+ infer_text_input_column = False
192
+ else:
193
+ logging.warning(f"Provided {dataset_text_column} is not in Dataset columns")
194
+
195
+ if infer_text_input_column:
196
+ # Try to retrieve one
197
+ candidates = [
198
+ f for f in dataset_features if dataset_features[f].dtype == "string"
199
+ ]
200
+ feature_map_df = pd.DataFrame(
201
+ {"Dataset Features": [candidates[0]], "Model Input Features": ["text"]}
202
+ )
203
+ if len(candidates) > 0:
204
+ logging.debug(f"Candidates are {candidates}")
205
+ column_mapping["text"] = candidates[0]
206
+
207
+ return column_mapping, feature_map_df
208
+
209
+
210
+ """
211
+ params:
212
+ column_mapping: dict
213
+ id2label_mapping: dict
214
+ example:
215
+ id2label_mapping: {
216
+ 'negative': 'negative',
217
+ 'neutral': 'neutral',
218
+ 'positive': 'positive'
219
+ }
220
+ """
221
+
222
+
223
+ def infer_output_label_column(
224
+ column_mapping, id2label_mapping, id2label, dataset_labels
225
+ ):
226
+ # Check whether we need to infer the output label column
227
+ if "data" in column_mapping.keys():
228
+ if isinstance(column_mapping["data"], list):
229
+ # Use the column mapping passed by user
230
+ for user_label, model_label in column_mapping["data"]:
231
+ id2label_mapping[model_label] = user_label
232
+ elif None in id2label_mapping.values():
233
+ column_mapping["label"] = {i: None for i in id2label.keys()}
234
+ return column_mapping, None
235
+
236
+ if "data" not in column_mapping.keys():
237
+ # Column mapping should contain original model labels
238
+ column_mapping["label"] = {
239
+ str(i): id2label_mapping[label]
240
+ for i, label in zip(id2label.keys(), dataset_labels)
241
+ }
242
+
243
+ id2label_df = pd.DataFrame(
244
+ {
245
+ "Dataset Labels": dataset_labels,
246
+ "Model Prediction Labels": [
247
+ id2label_mapping[label] for label in dataset_labels
248
+ ],
249
+ }
250
+ )
251
+
252
+ return column_mapping, id2label_df
253
+
254
+
255
+ def check_dataset_features_validity(d_id, config, split):
256
+ # We assume dataset is ok here
257
+ ds = datasets.load_dataset(d_id, config)[split]
258
+ try:
259
+ dataset_features = ds.features
260
+ except AttributeError:
261
+ # Dataset does not have features, need to provide everything
262
+ return None, None
263
+ # Load dataset as DataFrame
264
+ df = ds.to_pandas()
265
+
266
+ return df, dataset_features
267
+
268
+ def select_the_first_string_column(ds):
269
+ for feature in ds.features.keys():
270
+ if isinstance(ds[0][feature], str):
271
+ return feature
272
+ return None
273
+
274
+
275
+ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split):
276
+ # get a sample prediction from the model on the dataset
277
+ prediction_input = None
278
+ prediction_result = None
279
+ try:
280
+ # Use the first item to test prediction
281
+ ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
282
+ if "text" not in ds.features.keys():
283
+ # Dataset does not have text column
284
+ prediction_input = ds[0][select_the_first_string_column(ds)]
285
+ else:
286
+ prediction_input = ds[0]["text"]
287
+
288
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
289
+ payload = {"inputs": prediction_input, "options": {"use_cache": True}}
290
+ results = hf_inference_api(model_id, hf_token, payload)
291
+
292
+ if isinstance(results, dict) and "error" in results.keys():
293
+ if "estimated_time" in results.keys():
294
+ return prediction_input, HuggingFaceInferenceAPIResponse(
295
+ f"Estimated time: {int(results['estimated_time'])}s. Please try again later.")
296
+ return prediction_input, HuggingFaceInferenceAPIResponse(
297
+ f"Inference Error: {results['error']}.")
298
+
299
+ while isinstance(results, list):
300
+ if isinstance(results[0], dict):
301
+ break
302
+ results = results[0]
303
+ prediction_result = {
304
+ f'{result["label"]}': result["score"] for result in results
305
+ }
306
+ except Exception as e:
307
+ # inference api prediction failed, show the error message
308
+ logger.error(f"Get example prediction failed {e}")
309
+ return prediction_input, None
310
+
311
+ return prediction_input, prediction_result
312
+
313
+
314
+ def get_sample_prediction(ppl, df, column_mapping, id2label_mapping):
315
+ # get a sample prediction from the model on the dataset
316
+ prediction_input = None
317
+ prediction_result = None
318
+ try:
319
+ # Use the first item to test prediction
320
+ prediction_input = df.head(1).at[0, column_mapping["text"]]
321
+ results = ppl({"text": prediction_input}, top_k=None)
322
+ prediction_result = {
323
+ f'{result["label"]}': result["score"] for result in results
324
+ }
325
+ except Exception:
326
+ # Pipeline prediction failed, need to provide labels
327
+ return prediction_input, None
328
+
329
+ # Display results in original label and mapped label
330
+ prediction_result = {
331
+ f'{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result[
332
+ "score"
333
+ ]
334
+ for result in results
335
+ }
336
+ return prediction_input, prediction_result
337
+
338
+
339
+ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
340
+ # load dataset as pd DataFrame
341
+ # get features column from dataset
342
+ df, dataset_features = check_dataset_features_validity(d_id, config, split)
343
+
344
+ column_mapping, feature_map_df = infer_text_input_column(
345
+ column_mapping, dataset_features
346
+ )
347
+ if feature_map_df is None:
348
+ # dataset does not have any features
349
+ return None, None, None, None, None
350
+
351
+ # Retrieve all labels
352
+ id2label = ppl.model.config.id2label
353
+
354
+ # Infer labels
355
+ id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(
356
+ id2label, dataset_features
357
+ )
358
+ column_mapping, id2label_df = infer_output_label_column(
359
+ column_mapping, id2label_mapping, id2label, dataset_labels
360
+ )
361
+ if id2label_df is None:
362
+ # does not able to infer output label column
363
+ return column_mapping, None, None, None, feature_map_df
364
+
365
+ # Get a sample prediction
366
+ prediction_input, prediction_result = get_sample_prediction(
367
+ ppl, df, column_mapping, id2label_mapping
368
+ )
369
+ if prediction_result is None:
370
+ # does not able to get a sample prediction
371
+ return column_mapping, prediction_input, None, id2label_df, feature_map_df
372
+
373
+ return (
374
+ column_mapping,
375
+ prediction_input,
376
+ prediction_result,
377
+ id2label_df,
378
+ feature_map_df,
379
+ )
380
+
381
+ def strip_model_id_from_url(model_id):
382
+ if model_id.startswith("https://huggingface.co/"):
383
+ return "/".join(model_id.split("/")[-2])
384
+ return model_id
text_classification_ui_helpers.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ import logging
3
+ import threading
4
+ import uuid
5
+
6
+ import datasets
7
+ import gradio as gr
8
+ import pandas as pd
9
+
10
+ import leaderboard
11
+ from io_utils import read_column_mapping, write_column_mapping
12
+ from run_jobs import save_job_to_pipe
13
+ from text_classification import (
14
+ strip_model_id_from_url,
15
+ check_model_task,
16
+ preload_hf_inference_api,
17
+ get_example_prediction,
18
+ get_labels_and_features_from_dataset,
19
+ HuggingFaceInferenceAPIResponse,
20
+ )
21
+ from wordings import (
22
+ CHECK_CONFIG_OR_SPLIT_RAW,
23
+ CONFIRM_MAPPING_DETAILS_FAIL_RAW,
24
+ MAPPING_STYLED_ERROR_WARNING,
25
+ NOT_TEXT_CLASSIFICATION_MODEL_RAW,
26
+ get_styled_input,
27
+ )
28
+
29
+ MAX_LABELS = 40
30
+ MAX_FEATURES = 20
31
+
32
+ ds_dict = None
33
+ ds_config = None
34
+
35
+ def get_related_datasets_from_leaderboard(model_id):
36
+ records = leaderboard.records
37
+ model_id = strip_model_id_from_url(model_id)
38
+ model_records = records[records["model_id"] == model_id]
39
+ datasets_unique = list(model_records["dataset_id"].unique())
40
+
41
+ if len(datasets_unique) == 0:
42
+ return gr.update(choices=[], value="")
43
+
44
+ return gr.update(choices=datasets_unique, value=datasets_unique[0])
45
+
46
+
47
+ logger = logging.getLogger(__file__)
48
+
49
+
50
+ def check_dataset(dataset_id):
51
+ logger.info(f"Loading {dataset_id}")
52
+ try:
53
+ configs = datasets.get_dataset_config_names(dataset_id)
54
+ if len(configs) == 0:
55
+ return (
56
+ gr.update(),
57
+ gr.update(),
58
+ ""
59
+ )
60
+ splits = list(
61
+ datasets.load_dataset(
62
+ dataset_id, configs[0]
63
+ ).keys()
64
+ )
65
+ return (
66
+ gr.update(choices=configs, value=configs[0], visible=True),
67
+ gr.update(choices=splits, value=splits[0], visible=True),
68
+ ""
69
+ )
70
+ except Exception as e:
71
+ logger.warn(f"Check your dataset {dataset_id}: {e}")
72
+ return (
73
+ gr.update(),
74
+ gr.update(),
75
+ ""
76
+ )
77
+
78
+
79
+
80
+ def write_column_mapping_to_config(uid, *labels):
81
+ # TODO: Substitute 'text' with more features for zero-shot
82
+ # we are not using ds features because we only support "text" for now
83
+ all_mappings = read_column_mapping(uid)
84
+
85
+ if labels is None:
86
+ return
87
+ all_mappings = export_mappings(all_mappings, "labels", None, labels[:MAX_LABELS])
88
+ all_mappings = export_mappings(
89
+ all_mappings,
90
+ "features",
91
+ ["text"],
92
+ labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)],
93
+ )
94
+
95
+ write_column_mapping(all_mappings, uid)
96
+
97
+
98
+ def export_mappings(all_mappings, key, subkeys, values):
99
+ if key not in all_mappings.keys():
100
+ all_mappings[key] = dict()
101
+ if subkeys is None:
102
+ subkeys = list(all_mappings[key].keys())
103
+
104
+ if not subkeys:
105
+ logging.debug(f"subkeys is empty for {key}")
106
+ return all_mappings
107
+
108
+ for i, subkey in enumerate(subkeys):
109
+ if subkey:
110
+ all_mappings[key][subkey] = values[i % len(values)]
111
+ return all_mappings
112
+
113
+
114
+ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels, uid):
115
+ all_mappings = read_column_mapping(uid)
116
+ # For flattened raw datasets with no labels
117
+ # check if there are shared labels between model and dataset
118
+ shared_labels = set(model_labels).intersection(set(ds_labels))
119
+ if shared_labels:
120
+ ds_labels = list(shared_labels)
121
+ if len(ds_labels) > MAX_LABELS:
122
+ ds_labels = ds_labels[:MAX_LABELS]
123
+ gr.Warning(f"The number of labels is truncated to length {MAX_LABELS}")
124
+
125
+ # sort labels to make sure the order is consistent
126
+ # prediction gives the order based on probability
127
+ ds_labels.sort()
128
+ model_labels.sort()
129
+
130
+ lables = [
131
+ gr.Dropdown(
132
+ label=f"{label}",
133
+ choices=model_labels,
134
+ value=model_labels[i % len(model_labels)],
135
+ interactive=True,
136
+ visible=True,
137
+ )
138
+ for i, label in enumerate(ds_labels)
139
+ ]
140
+ lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
141
+ all_mappings = export_mappings(all_mappings, "labels", ds_labels, model_labels)
142
+
143
+ # TODO: Substitute 'text' with more features for zero-shot
144
+ features = [
145
+ gr.Dropdown(
146
+ label=f"{feature}",
147
+ choices=ds_features,
148
+ value=ds_features[0],
149
+ interactive=True,
150
+ visible=True,
151
+ )
152
+ for feature in ["text"]
153
+ ]
154
+ features += [
155
+ gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))
156
+ ]
157
+ all_mappings = export_mappings(all_mappings, "features", ["text"], ds_features)
158
+ write_column_mapping(all_mappings, uid)
159
+
160
+ return lables + features
161
+
162
+
163
+ def precheck_model_ds_enable_example_btn(
164
+ model_id, dataset_id, dataset_config, dataset_split
165
+ ):
166
+ model_id = strip_model_id_from_url(model_id)
167
+ model_task = check_model_task(model_id)
168
+ preload_hf_inference_api(model_id)
169
+ if model_task is None or model_task != "text-classification":
170
+ gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
171
+ return (gr.update(), gr.update(),"")
172
+
173
+ if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
174
+ return (gr.update(), gr.update(), "")
175
+
176
+ try:
177
+ ds = datasets.load_dataset(dataset_id, dataset_config)
178
+ df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
179
+ ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
180
+
181
+ if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
182
+ gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
183
+ return (gr.update(interactive=False), gr.update(value=df, visible=True), "")
184
+
185
+ return (gr.update(interactive=True), gr.update(value=df, visible=True), "")
186
+ except Exception as e:
187
+ # Config or split wrong
188
+ gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
189
+ return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
190
+
191
+
192
+ def align_columns_and_show_prediction(
193
+ model_id,
194
+ dataset_id,
195
+ dataset_config,
196
+ dataset_split,
197
+ uid,
198
+ run_inference,
199
+ inference_token,
200
+ ):
201
+ model_id = strip_model_id_from_url(model_id)
202
+ model_task = check_model_task(model_id)
203
+ if model_task is None or model_task != "text-classification":
204
+ gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
205
+ return (
206
+ gr.update(visible=False),
207
+ gr.update(visible=False),
208
+ gr.update(visible=False, open=False),
209
+ gr.update(interactive=False),
210
+ "",
211
+ *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)],
212
+ )
213
+
214
+ dropdown_placement = [
215
+ gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
216
+ ]
217
+
218
+ prediction_input, prediction_response = get_example_prediction(
219
+ model_id, dataset_id, dataset_config, dataset_split
220
+ )
221
+
222
+ if prediction_input is None or prediction_response is None:
223
+ return (
224
+ gr.update(visible=False),
225
+ gr.update(visible=False),
226
+ gr.update(visible=False, open=False),
227
+ gr.update(interactive=False),
228
+ "",
229
+ *dropdown_placement,
230
+ )
231
+
232
+ if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
233
+ return (
234
+ gr.update(visible=False),
235
+ gr.update(visible=False),
236
+ gr.update(visible=False, open=False),
237
+ gr.update(interactive=False),
238
+ f"Hugging Face Inference API is loading your model. {prediction_response.message}",
239
+ *dropdown_placement,
240
+ )
241
+
242
+ model_labels = list(prediction_response.keys())
243
+
244
+ ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
245
+ ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
246
+
247
+ # when dataset does not have labels or features
248
+ if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
249
+ gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
250
+ return (
251
+ gr.update(visible=False),
252
+ gr.update(visible=False),
253
+ gr.update(visible=False, open=False),
254
+ gr.update(interactive=False),
255
+ "",
256
+ *dropdown_placement,
257
+ )
258
+
259
+ column_mappings = list_labels_and_features_from_dataset(
260
+ ds_labels,
261
+ ds_features,
262
+ model_labels,
263
+ uid,
264
+ )
265
+
266
+ # when labels or features are not aligned
267
+ # show manually column mapping
268
+ if (
269
+ collections.Counter(model_labels) != collections.Counter(ds_labels)
270
+ or ds_features[0] != "text"
271
+ ):
272
+ return (
273
+ gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
274
+ gr.update(visible=False),
275
+ gr.update(visible=True, open=True),
276
+ gr.update(interactive=(run_inference and inference_token != "")),
277
+ "",
278
+ *column_mappings,
279
+ )
280
+
281
+ return (
282
+ gr.update(value=get_styled_input(prediction_input), visible=True),
283
+ gr.update(value=prediction_response, visible=True),
284
+ gr.update(visible=True, open=False),
285
+ gr.update(interactive=(run_inference and inference_token != "")),
286
+ "",
287
+ *column_mappings,
288
+ )
289
+
290
+
291
+ def check_column_mapping_keys_validity(all_mappings):
292
+ if all_mappings is None:
293
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
294
+ return (gr.update(interactive=True), gr.update(visible=False))
295
+
296
+ if "labels" not in all_mappings.keys():
297
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
298
+ return (gr.update(interactive=True), gr.update(visible=False))
299
+
300
+
301
+ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
302
+ label_mapping = {}
303
+ if len(all_mappings["labels"].keys()) != len(ds_labels):
304
+ gr.Warning("Label mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
305
+
306
+ if len(all_mappings["features"].keys()) != len(ds_features):
307
+ gr.Warning("Feature mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
308
+
309
+ for i, label in zip(range(len(ds_labels)), ds_labels):
310
+ # align the saved labels with dataset labels order
311
+ label_mapping.update({str(i): all_mappings["labels"][label]})
312
+
313
+ if "features" not in all_mappings.keys():
314
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
315
+ feature_mapping = all_mappings["features"]
316
+ return label_mapping, feature_mapping
317
+
318
+
319
+ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
320
+ all_mappings = read_column_mapping(uid)
321
+ check_column_mapping_keys_validity(all_mappings)
322
+
323
+ # get ds labels and features again for alignment
324
+ ds = datasets.load_dataset(d_id, config)[split]
325
+ ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
326
+ label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
327
+
328
+ eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
329
+ save_job_to_pipe(
330
+ uid,
331
+ (
332
+ m_id,
333
+ d_id,
334
+ config,
335
+ split,
336
+ inference,
337
+ inference_token,
338
+ uid,
339
+ label_mapping,
340
+ feature_mapping,
341
+ ),
342
+ eval_str,
343
+ threading.Lock(),
344
+ )
345
+ gr.Info("Your evaluation has been submitted")
346
+
347
+ return (
348
+ gr.update(interactive=False), # Submit button
349
+ gr.update(lines=5, visible=True, interactive=False),
350
+ uuid.uuid4(), # Allocate a new uuid
351
+ )
tmp/.gitkeep ADDED
File without changes
tmp/venvs/.gitkeep ADDED
File without changes
utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ import yaml
4
+
5
+
6
+ # read scanners from yaml file
7
+ # return a list of scanners
8
+ def read_scanners(path):
9
+ scanners = []
10
+ with open(path, "r") as f:
11
+ config = yaml.load(f, Loader=yaml.FullLoader)
12
+ scanners = config.get("detectors", None)
13
+ return scanners
14
+
15
+
16
+ # convert a list of scanners to yaml file
17
+ def write_scanners(scanners):
18
+ with open("./scan_config.yaml", "w") as f:
19
+ # save scanners to detectors in yaml
20
+ yaml.dump({"detectors": scanners}, f)
21
+
22
+
23
+ # convert column mapping dataframe to json
24
+ def convert_column_mapping_to_json(df, label=""):
25
+ column_mapping = {}
26
+ column_mapping[label] = []
27
+ for _, row in df.iterrows():
28
+ column_mapping[label].append(row.tolist())
29
+ return column_mapping
validate_queue.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import random
2
+ import time
3
+
4
+ import gradio as gr
5
+
6
+
7
+ def sleep_a_while():
8
+ seconds = random.randint(5, 10)
9
+ print(f"Working for {seconds} seconds")
10
+ start = time.time()
11
+ while start + seconds > time.time():
12
+ continue
13
+ return str(seconds)
14
+
15
+
16
+ with gr.Blocks() as iface:
17
+ text = gr.Textbox(label="Slept second")
18
+
19
+ run_btn = gr.Button("Run")
20
+ run_btn.click(sleep_a_while, queue=False, outputs=text, concurrency_limit=1)
21
+
22
+ if __name__ == "__main__":
23
+ iface.queue(max_size=2, default_concurrency_limit=2).launch()
wordings.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INTRODUCTION_MD = """
2
+ <h1 style="text-align: center;">
3
+ 🐢Giskard Evaluator
4
+ </h1>
5
+ Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
6
+ """
7
+ CONFIRM_MAPPING_DETAILS_MD = """
8
+ <h1 style="text-align: center;">
9
+ Confirm Pre-processing Details
10
+ </h1>
11
+ Please confirm the pre-processing details below. Align the column names of your model in the <b>dropdown</b> menu to your dataset's. If you are not sure, please double check your model and dataset.
12
+ """
13
+ CONFIRM_MAPPING_DETAILS_FAIL_MD = """
14
+ <h1 style="text-align: center;">
15
+ Confirm Pre-processing Details
16
+ </h1>
17
+ Sorry, we cannot align the input/output of your dataset with the model. <b>Pleaser double check your model and dataset.</b>
18
+ """
19
+
20
+ CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
21
+ Sorry, we cannot align the input/output of your dataset with the model. Pleaser double check your model and dataset.
22
+ """
23
+
24
+ CHECK_CONFIG_OR_SPLIT_RAW = """
25
+ Please check your dataset config or split.
26
+ """
27
+
28
+ PREDICTION_SAMPLE_MD = """
29
+ <h1 style="text-align: center;">
30
+ Model Prediction Sample
31
+ </h1>
32
+ Here is a sample prediction from your model based on your dataset.
33
+ """
34
+
35
+ MAPPING_STYLED_ERROR_WARNING = """
36
+ <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
37
+ Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
38
+ </h3>
39
+ """
40
+
41
+ NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
42
+ Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
43
+ """
44
+
45
+ USE_INFERENCE_API_TIP = """
46
+ We recommend to use
47
+ <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
48
+ Hugging Face Inference API
49
+ </a>
50
+ for the evaluation,
51
+ which requires your <a href="https://huggingface.co/settings/tokens">HF token</a>.
52
+ <br/>
53
+ Otherwise, an
54
+ <a href="https://huggingface.co/docs/transformers/main_classes/pipelines#transformers.TextClassificationPipeline">
55
+ HF pipeline
56
+ </a>
57
+ will be created and run in this Space. It takes more time to get the result.
58
+ <br/>
59
+ <b>
60
+ Do not worry, your HF token is only used in this Space for your evaluation.
61
+ </b>
62
+ """
63
+
64
+ def get_styled_input(input):
65
+ return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
66
+ Sample input: {input}
67
+ </h3>"""