ZeroCommand commited on
Commit
9b20db6
2 Parent(s): 49f5e28 14bc302

add login button

Browse files
README.md CHANGED
@@ -7,6 +7,13 @@ sdk: gradio
7
  sdk_version: 4.7.1
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 4.7.1
8
  app_file: app.py
9
  pinned: false
10
+
11
+ hf_oauth: true
12
+ # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
13
+ hf_oauth_expiration_minutes: 480
14
+ # optional, see "Scopes" below. "openid profile" is always included.
15
+ hf_oauth_scopes:
16
+ - inference-api
17
  ---
18
 
19
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app_debug.py CHANGED
@@ -3,12 +3,12 @@ from os.path import isfile, join
3
  import html
4
 
5
  import gradio as gr
6
-
7
- import utils.pipe as pipe
8
- from utils.io_utils import get_logs_file
9
 
10
  LOG_PATH = "./tmp"
11
- CONFIG_PATH = "./cicd/configs/"
12
  MAX_FILES_NUM = 20
13
 
14
 
@@ -69,17 +69,19 @@ def get_queue_status():
69
 
70
 
71
  def get_demo():
 
 
72
  with gr.Row():
73
  gr.HTML(
74
  value=get_queue_status,
75
  every=5,
76
  )
77
- with gr.Accordion(label="Log Files", open=False):
78
- with gr.Row():
79
- gr.Files(value=get_log_files, label="Log Files", every=10)
80
  with gr.Row():
81
  gr.Textbox(
82
  value=get_logs_file, every=0.5, lines=10, visible=True, label="Current Log File"
83
  )
 
 
84
  with gr.Accordion(label="Config Files", open=False):
85
  gr.Files(value=get_config_files, label="Config Files", every=10)
 
3
  import html
4
 
5
  import gradio as gr
6
+ import os
7
+ import pipe
8
+ from io_utils import get_logs_file
9
 
10
  LOG_PATH = "./tmp"
11
+ CONFIG_PATH = "./cicd/configs/submitted/"
12
  MAX_FILES_NUM = 20
13
 
14
 
 
69
 
70
 
71
  def get_demo():
72
+ if not os.path.exists(CONFIG_PATH):
73
+ os.makedirs(CONFIG_PATH)
74
  with gr.Row():
75
  gr.HTML(
76
  value=get_queue_status,
77
  every=5,
78
  )
79
+ with gr.Accordion(label="Log Files", open=True):
 
 
80
  with gr.Row():
81
  gr.Textbox(
82
  value=get_logs_file, every=0.5, lines=10, visible=True, label="Current Log File"
83
  )
84
+ with gr.Row():
85
+ gr.Files(value=get_log_files, label="Log Files", every=10)
86
  with gr.Accordion(label="Config Files", open=False):
87
  gr.Files(value=get_config_files, label="Config Files", every=10)
app_leaderboard.py CHANGED
@@ -88,11 +88,29 @@ def get_demo(leaderboard_tab):
88
  dataset_ids = get_dataset_ids(records)
89
 
90
  column_names = records.columns.tolist()
 
 
91
  default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
92
  default_df = records[default_columns] # extract columns selected
93
  types = get_types(default_df)
94
  display_df = get_display_df(default_df) # the styled dataframe to display
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  with gr.Row():
97
  task_select = gr.Dropdown(
98
  label="Task",
@@ -110,42 +128,35 @@ def get_demo(leaderboard_tab):
110
  interactive=True,
111
  )
112
 
113
- with gr.Row():
114
- columns_select = gr.CheckboxGroup(
115
- label="Show columns",
116
- choices=column_names,
117
- value=default_columns,
118
- interactive=True,
119
- )
120
-
121
  with gr.Row():
122
  leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
123
 
124
- def update_leaderboard_records(model_id, dataset_id, columns, task):
125
  global update_time
126
  if datetime.datetime.now() - update_time < datetime.timedelta(minutes=10):
127
  return gr.update()
128
  update_time = datetime.datetime.now()
129
  logger.info("Updating leaderboard records")
130
  leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
131
- return filter_table(model_id, dataset_id, columns, task)
132
 
133
  leaderboard_tab.select(
134
  fn=update_leaderboard_records,
135
- inputs=[model_select, dataset_select, columns_select, task_select],
136
  outputs=[leaderboard_df])
137
 
138
  @gr.on(
139
  triggers=[
140
  model_select.change,
141
  dataset_select.change,
142
- columns_select.change,
 
143
  task_select.change,
144
  ],
145
- inputs=[model_select, dataset_select, columns_select, task_select],
146
  outputs=[leaderboard_df],
147
  )
148
- def filter_table(model_id, dataset_id, columns, task):
149
  logger.info("Filtering leaderboard records")
150
  records = leaderboard.records
151
  # filter the table based on task
@@ -156,8 +167,9 @@ def get_demo(leaderboard_tab):
156
  if dataset_id and dataset_id != "Any":
157
  df = df[(df["dataset_id"] == dataset_id)]
158
 
159
- # filter the table based on the columns
160
- df = df[columns]
 
161
  types = get_types(df)
162
  display_df = get_display_df(df)
163
  return gr.update(value=display_df, datatype=types, interactive=False)
 
88
  dataset_ids = get_dataset_ids(records)
89
 
90
  column_names = records.columns.tolist()
91
+ issue_columns = column_names[:11]
92
+ info_columns = column_names[15:]
93
  default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
94
  default_df = records[default_columns] # extract columns selected
95
  types = get_types(default_df)
96
  display_df = get_display_df(default_df) # the styled dataframe to display
97
 
98
+ with gr.Row():
99
+ with gr.Column():
100
+ issue_columns_select = gr.CheckboxGroup(
101
+ label="Issue Columns",
102
+ choices=issue_columns,
103
+ value=[],
104
+ interactive=True,
105
+ )
106
+ with gr.Column():
107
+ info_columns_select = gr.CheckboxGroup(
108
+ label="Info Columns",
109
+ choices=info_columns,
110
+ value=default_columns,
111
+ interactive=True,
112
+ )
113
+
114
  with gr.Row():
115
  task_select = gr.Dropdown(
116
  label="Task",
 
128
  interactive=True,
129
  )
130
 
 
 
 
 
 
 
 
 
131
  with gr.Row():
132
  leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
133
 
134
+ def update_leaderboard_records(model_id, dataset_id, issue_columns, info_columns, task):
135
  global update_time
136
  if datetime.datetime.now() - update_time < datetime.timedelta(minutes=10):
137
  return gr.update()
138
  update_time = datetime.datetime.now()
139
  logger.info("Updating leaderboard records")
140
  leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
141
+ return filter_table(model_id, dataset_id, issue_columns, info_columns, task)
142
 
143
  leaderboard_tab.select(
144
  fn=update_leaderboard_records,
145
+ inputs=[model_select, dataset_select, issue_columns_select, info_columns_select, task_select],
146
  outputs=[leaderboard_df])
147
 
148
  @gr.on(
149
  triggers=[
150
  model_select.change,
151
  dataset_select.change,
152
+ issue_columns_select.change,
153
+ info_columns_select.change,
154
  task_select.change,
155
  ],
156
+ inputs=[model_select, dataset_select, issue_columns_select, info_columns_select, task_select],
157
  outputs=[leaderboard_df],
158
  )
159
+ def filter_table(model_id, dataset_id, issue_columns, info_columns, task):
160
  logger.info("Filtering leaderboard records")
161
  records = leaderboard.records
162
  # filter the table based on task
 
167
  if dataset_id and dataset_id != "Any":
168
  df = df[(df["dataset_id"] == dataset_id)]
169
 
170
+ # filter the table based on the columns
171
+ issue_columns.sort()
172
+ df = df[info_columns + issue_columns]
173
  types = get_types(df)
174
  display_df = get_display_df(df)
175
  return gr.update(value=display_df, datatype=types, interactive=False)
app_legacy.py CHANGED
@@ -376,7 +376,7 @@ def get_demo():
376
  selected = read_scanners("./config.yaml")
377
  scan_config = selected + ["data_leakage"]
378
  scanners = gr.CheckboxGroup(
379
- choices=scan_config, value=selected, label="Scan Settings", visible=True
380
  )
381
 
382
  with gr.Row():
 
376
  selected = read_scanners("./config.yaml")
377
  scan_config = selected + ["data_leakage"]
378
  scanners = gr.CheckboxGroup(
379
+ choices=scan_config, value=selected, visible=True
380
  )
381
 
382
  with gr.Row():
app_text_classification.py CHANGED
@@ -6,6 +6,7 @@ from utils.io_utils import read_scanners, write_scanners
6
  from utils.ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
 
9
  check_dataset,
10
  show_hf_token_info,
11
  precheck_model_ds_enable_example_btn,
@@ -16,12 +17,11 @@ from utils.ui_helpers import (
16
  )
17
 
18
  import logging
19
- from utils.wordings import (
20
  CONFIRM_MAPPING_DETAILS_MD,
21
  INTRODUCTION_MD,
22
- USE_INFERENCE_API_TIP,
23
  CHECK_LOG_SECTION_RAW,
24
- HF_TOKEN_INVALID_STYLED
25
  )
26
 
27
  MAX_LABELS = 40
@@ -34,6 +34,8 @@ logger = logging.getLogger(__name__)
34
  def get_demo():
35
  with gr.Row():
36
  gr.Markdown(INTRODUCTION_MD)
 
 
37
  uid_label = gr.Textbox(
38
  label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
39
  )
@@ -58,7 +60,7 @@ def get_demo():
58
  with gr.Row():
59
  first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
60
  with gr.Row():
61
- loading_status = gr.HTML(visible=True)
62
  with gr.Row():
63
  example_btn = gr.Button(
64
  "Validate Model & Dataset",
@@ -66,11 +68,13 @@ def get_demo():
66
  variant="primary",
67
  interactive=False,
68
  )
69
-
70
  with gr.Row():
71
- example_input = gr.HTML(visible=False)
 
 
72
  with gr.Row():
73
- example_prediction = gr.Label(label="Model Prediction Sample", visible=False)
 
74
 
75
  with gr.Row():
76
  with gr.Accordion(
@@ -89,27 +93,8 @@ def get_demo():
89
  for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
90
  column_mappings.append(gr.Dropdown(visible=False))
91
 
92
- with gr.Accordion(label="Model Wrap Advance Config", open=True):
93
- gr.HTML(USE_INFERENCE_API_TIP)
94
-
95
- run_inference = gr.Checkbox(value=True, label="Run with Inference API")
96
- inference_token = gr.Textbox(
97
- placeholder="hf-xxxxxxxxxxxxxxxxxxxx",
98
- value="",
99
- label="HF Token for Inference API",
100
- visible=True,
101
- interactive=True,
102
- )
103
- inference_token_info = gr.HTML(value=HF_TOKEN_INVALID_STYLED, visible=False)
104
-
105
- inference_token.change(
106
- fn=show_hf_token_info,
107
- inputs=[inference_token],
108
- outputs=[inference_token_info],
109
- )
110
-
111
- with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
112
- scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
113
 
114
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
115
  def get_scanners(uid):
@@ -117,7 +102,16 @@ def get_demo():
117
  # we remove data_leakage from the default scanners
118
  # Reason: data_leakage barely raises any issues and takes too many requests
119
  # when using inference API, causing rate limit error
120
- scan_config = selected + ["data_leakage"]
 
 
 
 
 
 
 
 
 
121
  return gr.update(
122
  choices=scan_config, value=selected, label="Scan Settings", visible=True
123
  )
@@ -147,16 +141,24 @@ def get_demo():
147
  inputs=[model_id_input],
148
  outputs=[dataset_id_input],
149
  ).then(
150
- fn=check_dataset,
151
- inputs=[dataset_id_input],
152
- outputs=[dataset_config_input, dataset_split_input, loading_status]
153
  )
154
 
155
  gr.on(
156
- triggers=[dataset_id_input.change],
157
  fn=check_dataset,
158
  inputs=[dataset_id_input],
159
- outputs=[dataset_config_input, dataset_split_input, loading_status]
 
 
 
 
 
 
 
 
160
  )
161
 
162
  gr.on(
@@ -187,6 +189,7 @@ def get_demo():
187
  gr.on(
188
  triggers=[
189
  model_id_input.change,
 
190
  dataset_id_input.change,
191
  dataset_config_input.change,
192
  dataset_split_input.change,
@@ -198,7 +201,13 @@ def get_demo():
198
  dataset_config_input,
199
  dataset_split_input,
200
  ],
201
- outputs=[example_btn, first_line_ds, loading_status],
 
 
 
 
 
 
202
  )
203
 
204
  gr.on(
@@ -212,15 +221,14 @@ def get_demo():
212
  dataset_config_input,
213
  dataset_split_input,
214
  uid_label,
215
- run_inference,
216
- inference_token,
217
  ],
218
  outputs=[
 
219
  example_input,
220
  example_prediction,
221
  column_mapping_accordion,
222
  run_btn,
223
- loading_status,
224
  *column_mappings,
225
  ],
226
  )
@@ -235,24 +243,26 @@ def get_demo():
235
  dataset_id_input,
236
  dataset_config_input,
237
  dataset_split_input,
238
- run_inference,
239
- inference_token,
240
  uid_label,
241
  ],
242
- outputs=[run_btn, logs, uid_label],
 
 
 
 
 
 
 
 
243
  )
244
 
245
  gr.on(
246
  triggers=[
247
- run_inference.input,
248
- inference_token.input,
249
  scanners.input,
250
  ],
251
  fn=enable_run_btn,
252
  inputs=[
253
  uid_label,
254
- run_inference,
255
- inference_token,
256
  model_id_input,
257
  dataset_id_input,
258
  dataset_config_input,
@@ -266,8 +276,6 @@ def get_demo():
266
  fn=enable_run_btn,
267
  inputs=[
268
  uid_label,
269
- run_inference,
270
- inference_token,
271
  model_id_input,
272
  dataset_id_input,
273
  dataset_config_input,
 
6
  from utils.ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
9
+ get_dataset_splits,
10
  check_dataset,
11
  show_hf_token_info,
12
  precheck_model_ds_enable_example_btn,
 
17
  )
18
 
19
  import logging
20
+ from wordings import (
21
  CONFIRM_MAPPING_DETAILS_MD,
22
  INTRODUCTION_MD,
23
+ LOG_IN_TIPS,
24
  CHECK_LOG_SECTION_RAW,
 
25
  )
26
 
27
  MAX_LABELS = 40
 
34
  def get_demo():
35
  with gr.Row():
36
  gr.Markdown(INTRODUCTION_MD)
37
+ gr.HTML(LOG_IN_TIPS)
38
+ gr.LoginButton()
39
  uid_label = gr.Textbox(
40
  label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
41
  )
 
60
  with gr.Row():
61
  first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
62
  with gr.Row():
63
+ loading_dataset_info = gr.HTML(visible=True)
64
  with gr.Row():
65
  example_btn = gr.Button(
66
  "Validate Model & Dataset",
 
68
  variant="primary",
69
  interactive=False,
70
  )
 
71
  with gr.Row():
72
+ loading_validation = gr.HTML(visible=True)
73
+ with gr.Row():
74
+ validation_result = gr.HTML(visible=False)
75
  with gr.Row():
76
+ example_input = gr.Textbox(label="Example Input", visible=False, interactive=False)
77
+ example_prediction = gr.Label(label="Model Sample Prediction", visible=False)
78
 
79
  with gr.Row():
80
  with gr.Accordion(
 
93
  for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
94
  column_mappings.append(gr.Dropdown(visible=False))
95
 
96
+ with gr.Accordion(label="Scanner Advanced Config (optional)", open=False):
97
+ scanners = gr.CheckboxGroup(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
100
  def get_scanners(uid):
 
102
  # we remove data_leakage from the default scanners
103
  # Reason: data_leakage barely raises any issues and takes too many requests
104
  # when using inference API, causing rate limit error
105
+ scan_config = [
106
+ "ethical_bias",
107
+ "text_perturbation",
108
+ "robustness",
109
+ "performance",
110
+ "underconfidence",
111
+ "overconfidence",
112
+ "spurious_correlation",
113
+ "data_leakage",
114
+ ]
115
  return gr.update(
116
  choices=scan_config, value=selected, label="Scan Settings", visible=True
117
  )
 
141
  inputs=[model_id_input],
142
  outputs=[dataset_id_input],
143
  ).then(
144
+ fn=check_dataset,
145
+ inputs=[dataset_id_input],
146
+ outputs=[dataset_config_input, dataset_split_input, loading_dataset_info],
147
  )
148
 
149
  gr.on(
150
+ triggers=[dataset_id_input.input, dataset_id_input.select],
151
  fn=check_dataset,
152
  inputs=[dataset_id_input],
153
+ outputs=[dataset_config_input, dataset_split_input, loading_dataset_info]
154
+ )
155
+
156
+ dataset_config_input.change(fn=get_dataset_splits, inputs=[dataset_id_input, dataset_config_input], outputs=[dataset_split_input])
157
+
158
+ gr.on(
159
+ triggers=[model_id_input.change, dataset_id_input.change, dataset_config_input.change],
160
+ fn=empty_column_mapping,
161
+ inputs=[uid_label]
162
  )
163
 
164
  gr.on(
 
189
  gr.on(
190
  triggers=[
191
  model_id_input.change,
192
+ model_id_input.input,
193
  dataset_id_input.change,
194
  dataset_config_input.change,
195
  dataset_split_input.change,
 
201
  dataset_config_input,
202
  dataset_split_input,
203
  ],
204
+ outputs=[
205
+ example_btn,
206
+ first_line_ds,
207
+ validation_result,
208
+ example_input,
209
+ example_prediction,
210
+ column_mapping_accordion,],
211
  )
212
 
213
  gr.on(
 
221
  dataset_config_input,
222
  dataset_split_input,
223
  uid_label,
 
 
224
  ],
225
  outputs=[
226
+ validation_result,
227
  example_input,
228
  example_prediction,
229
  column_mapping_accordion,
230
  run_btn,
231
+ loading_validation,
232
  *column_mappings,
233
  ],
234
  )
 
243
  dataset_id_input,
244
  dataset_config_input,
245
  dataset_split_input,
 
 
246
  uid_label,
247
  ],
248
+ outputs=[
249
+ run_btn,
250
+ logs,
251
+ uid_label,
252
+ validation_result,
253
+ example_input,
254
+ example_prediction,
255
+ column_mapping_accordion,
256
+ ],
257
  )
258
 
259
  gr.on(
260
  triggers=[
 
 
261
  scanners.input,
262
  ],
263
  fn=enable_run_btn,
264
  inputs=[
265
  uid_label,
 
 
266
  model_id_input,
267
  dataset_id_input,
268
  dataset_config_input,
 
276
  fn=enable_run_btn,
277
  inputs=[
278
  uid_label,
 
 
279
  model_id_input,
280
  dataset_id_input,
281
  dataset_config_input,
requirements.txt CHANGED
@@ -4,4 +4,6 @@ hf-transfer
4
  torch==2.0.1
5
  transformers
6
  datasets
 
 
7
  -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
 
4
  torch==2.0.1
5
  transformers
6
  datasets
7
+ tabulate
8
+ gradio[oauth]
9
  -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
text_classification.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+
4
+ import datasets
5
+ import huggingface_hub
6
+ import pandas as pd
7
+ from transformers import pipeline
8
+ import requests
9
+ import os
10
+ from app_env import HF_WRITE_TOKEN
11
+
12
+ logger = logging.getLogger(__name__)
13
+ AUTH_CHECK_URL = "https://huggingface.co/api/whoami-v2"
14
+
15
+ logger = logging.getLogger(__file__)
16
+
17
+ class HuggingFaceInferenceAPIResponse:
18
+ def __init__(self, message):
19
+ self.message = message
20
+
21
+
22
+ def get_labels_and_features_from_dataset(ds):
23
+ try:
24
+ dataset_features = ds.features
25
+ label_keys = [i for i in dataset_features.keys() if i.startswith("label")]
26
+ features = [f for f in dataset_features.keys() if not f.startswith("label")]
27
+
28
+ if len(label_keys) == 0: # no labels found
29
+ # return everything for post processing
30
+ return list(dataset_features.keys()), list(dataset_features.keys()), None
31
+
32
+ labels = None
33
+ if not isinstance(dataset_features[label_keys[0]], datasets.ClassLabel):
34
+ if hasattr(dataset_features[label_keys[0]], "feature"):
35
+ label_feat = dataset_features[label_keys[0]].feature
36
+ labels = label_feat.names
37
+ else:
38
+ labels = ds.unique(label_keys[0])
39
+ else:
40
+ labels = dataset_features[label_keys[0]].names
41
+ return labels, features, label_keys
42
+ except Exception as e:
43
+ logging.warning(
44
+ f"Get Labels/Features Failed for dataset: {e}"
45
+ )
46
+ return None, None, None
47
+
48
+ def check_model_task(model_id):
49
+ # check if model is valid on huggingface
50
+ try:
51
+ task = huggingface_hub.model_info(model_id).pipeline_tag
52
+ if task is None:
53
+ return None
54
+ return task
55
+ except Exception:
56
+ return None
57
+
58
+ def get_model_labels(model_id, example_input):
59
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
60
+ payload = {"inputs": example_input, "options": {"use_cache": True}}
61
+ response = hf_inference_api(model_id, hf_token, payload)
62
+ if "error" in response:
63
+ return None
64
+ return extract_from_response(response, "label")
65
+
66
+ def extract_from_response(data, key):
67
+ results = []
68
+
69
+ if isinstance(data, dict):
70
+ res = data.get(key)
71
+ if res is not None:
72
+ results.append(res)
73
+
74
+ for value in data.values():
75
+ results.extend(extract_from_response(value, key))
76
+
77
+ elif isinstance(data, list):
78
+ for element in data:
79
+ results.extend(extract_from_response(element, key))
80
+
81
+ return results
82
+
83
+ def hf_inference_api(model_id, hf_token, payload):
84
+ hf_inference_api_endpoint = os.environ.get(
85
+ "HF_INFERENCE_ENDPOINT", default="https://api-inference.huggingface.co"
86
+ )
87
+ url = f"{hf_inference_api_endpoint}/models/{model_id}"
88
+ headers = {"Authorization": f"Bearer {hf_token}"}
89
+ response = requests.post(url, headers=headers, json=payload)
90
+
91
+ if not hasattr(response, "status_code") or response.status_code != 200:
92
+ logger.warning(f"Request to inference API returns {response}")
93
+
94
+ try:
95
+ output = response.json()
96
+ if "error" in output and "Input is too long" in output["error"]:
97
+ payload.update({"parameters": {"truncation": True, "max_length": 512}})
98
+ response = requests.post(url, headers=headers, json=payload)
99
+ if not hasattr(response, "status_code") or response.status_code != 200:
100
+ logger.warning(f"Request to inference API returns {response}")
101
+ return response.json()
102
+ except Exception:
103
+ return {"error": response.content}
104
+
105
+ def preload_hf_inference_api(model_id):
106
+ payload = {"inputs": "This is a test", "options": {"use_cache": True, }}
107
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
108
+ hf_inference_api(model_id, hf_token, payload)
109
+
110
+ def check_model_pipeline(model_id):
111
+ try:
112
+ task = huggingface_hub.model_info(model_id).pipeline_tag
113
+ except Exception:
114
+ return None
115
+
116
+ try:
117
+ ppl = pipeline(task=task, model=model_id)
118
+
119
+ return ppl
120
+ except Exception:
121
+ return None
122
+
123
+
124
+ def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
125
+ for model_label in id2label_mapping.keys():
126
+ if model_label.upper() == label.upper():
127
+ return model_label, label
128
+ return None, label
129
+
130
+
131
+ def text_classification_map_model_and_dataset_labels(id2label, dataset_features):
132
+ id2label_mapping = {id2label[k]: None for k in id2label.keys()}
133
+ dataset_labels = None
134
+ for feature in dataset_features.values():
135
+ if not isinstance(feature, datasets.ClassLabel):
136
+ continue
137
+ if len(feature.names) != len(id2label_mapping.keys()):
138
+ continue
139
+
140
+ dataset_labels = feature.names
141
+ # Try to match labels
142
+ for label in feature.names:
143
+ if label in id2label_mapping.keys():
144
+ model_label = label
145
+ else:
146
+ # Try to find case unsensative
147
+ model_label, label = text_classificaiton_match_label_case_unsensative(
148
+ id2label_mapping, label
149
+ )
150
+ if model_label is not None:
151
+ id2label_mapping[model_label] = label
152
+ else:
153
+ print(f"Label {label} is not found in model labels")
154
+
155
+ return id2label_mapping, dataset_labels
156
+
157
+
158
+ """
159
+ params:
160
+ column_mapping: dict
161
+ example: {
162
+ "text": "sentences",
163
+ "label": {
164
+ "label0": "LABEL_0",
165
+ "label1": "LABEL_1"
166
+ }
167
+ }
168
+ ppl: pipeline
169
+ """
170
+
171
+
172
+ def check_column_mapping_keys_validity(column_mapping, ppl):
173
+ # get the element in all the list elements
174
+ column_mapping = json.loads(column_mapping)
175
+ if "data" not in column_mapping.keys():
176
+ return True
177
+ user_labels = set([pair[0] for pair in column_mapping["data"]])
178
+ model_labels = set([pair[1] for pair in column_mapping["data"]])
179
+
180
+ id2label = ppl.model.config.id2label
181
+ original_labels = set(id2label.values())
182
+
183
+ return user_labels == model_labels == original_labels
184
+
185
+
186
+ """
187
+ params:
188
+ column_mapping: dict
189
+ dataset_features: dict
190
+ example: {
191
+ 'text': Value(dtype='string', id=None),
192
+ 'label': ClassLabel(names=['negative', 'neutral', 'positive'], id=None)
193
+ }
194
+ """
195
+
196
+
197
+ def infer_text_input_column(column_mapping, dataset_features):
198
+ # Check whether we need to infer the text input column
199
+ infer_text_input_column = True
200
+ feature_map_df = None
201
+
202
+ if "text" in column_mapping.keys():
203
+ dataset_text_column = column_mapping["text"]
204
+ if dataset_text_column in dataset_features.keys():
205
+ infer_text_input_column = False
206
+ else:
207
+ logging.warning(f"Provided {dataset_text_column} is not in Dataset columns")
208
+
209
+ if infer_text_input_column:
210
+ # Try to retrieve one
211
+ candidates = [
212
+ f for f in dataset_features if dataset_features[f].dtype == "string"
213
+ ]
214
+ feature_map_df = pd.DataFrame(
215
+ {"Dataset Features": [candidates[0]], "Model Input Features": ["text"]}
216
+ )
217
+ if len(candidates) > 0:
218
+ logging.debug(f"Candidates are {candidates}")
219
+ column_mapping["text"] = candidates[0]
220
+
221
+ return column_mapping, feature_map_df
222
+
223
+
224
+ """
225
+ params:
226
+ column_mapping: dict
227
+ id2label_mapping: dict
228
+ example:
229
+ id2label_mapping: {
230
+ 'negative': 'negative',
231
+ 'neutral': 'neutral',
232
+ 'positive': 'positive'
233
+ }
234
+ """
235
+
236
+
237
+ def infer_output_label_column(
238
+ column_mapping, id2label_mapping, id2label, dataset_labels
239
+ ):
240
+ # Check whether we need to infer the output label column
241
+ if "data" in column_mapping.keys():
242
+ if isinstance(column_mapping["data"], list):
243
+ # Use the column mapping passed by user
244
+ for user_label, model_label in column_mapping["data"]:
245
+ id2label_mapping[model_label] = user_label
246
+ elif None in id2label_mapping.values():
247
+ column_mapping["label"] = {i: None for i in id2label.keys()}
248
+ return column_mapping, None
249
+
250
+ if "data" not in column_mapping.keys():
251
+ # Column mapping should contain original model labels
252
+ column_mapping["label"] = {
253
+ str(i): id2label_mapping[label]
254
+ for i, label in zip(id2label.keys(), dataset_labels)
255
+ }
256
+
257
+ id2label_df = pd.DataFrame(
258
+ {
259
+ "Dataset Labels": dataset_labels,
260
+ "Model Prediction Labels": [
261
+ id2label_mapping[label] for label in dataset_labels
262
+ ],
263
+ }
264
+ )
265
+
266
+ return column_mapping, id2label_df
267
+
268
+
269
+ def check_dataset_features_validity(d_id, config, split):
270
+ # We assume dataset is ok here
271
+ ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
272
+ try:
273
+ dataset_features = ds.features
274
+ except AttributeError:
275
+ # Dataset does not have features, need to provide everything
276
+ return None, None
277
+ # Load dataset as DataFrame
278
+ df = ds.to_pandas()
279
+
280
+ return df, dataset_features
281
+
282
+ def select_the_first_string_column(ds):
283
+ for feature in ds.features.keys():
284
+ if isinstance(ds[0][feature], str):
285
+ return feature
286
+ return None
287
+
288
+
289
+ def get_example_prediction(model_id, dataset_id, dataset_config, dataset_split, hf_token):
290
+ # get a sample prediction from the model on the dataset
291
+ prediction_input = None
292
+ prediction_result = None
293
+ try:
294
+ # Use the first item to test prediction
295
+ ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
296
+ if "text" not in ds.features.keys():
297
+ # Dataset does not have text column
298
+ prediction_input = ds[0][select_the_first_string_column(ds)]
299
+ else:
300
+ prediction_input = ds[0]["text"]
301
+
302
+ payload = {"inputs": prediction_input, "options": {"use_cache": True}}
303
+ results = hf_inference_api(model_id, hf_token, payload)
304
+
305
+ if isinstance(results, dict) and "error" in results.keys():
306
+ if "estimated_time" in results.keys():
307
+ return prediction_input, HuggingFaceInferenceAPIResponse(
308
+ f"Estimated time: {int(results['estimated_time'])}s. Please try again later.")
309
+ return prediction_input, HuggingFaceInferenceAPIResponse(
310
+ f"Inference Error: {results['error']}.")
311
+
312
+ while isinstance(results, list):
313
+ if isinstance(results[0], dict):
314
+ break
315
+ results = results[0]
316
+ prediction_result = {
317
+ f'{result["label"]}': result["score"] for result in results
318
+ }
319
+ except Exception as e:
320
+ # inference api prediction failed, show the error message
321
+ logger.error(f"Get example prediction failed {e}")
322
+ return prediction_input, None
323
+
324
+ return prediction_input, prediction_result
325
+
326
+
327
+ def get_sample_prediction(ppl, df, column_mapping, id2label_mapping):
328
+ # get a sample prediction from the model on the dataset
329
+ prediction_input = None
330
+ prediction_result = None
331
+ try:
332
+ # Use the first item to test prediction
333
+ prediction_input = df.head(1).at[0, column_mapping["text"]]
334
+ results = ppl({"text": prediction_input}, top_k=None)
335
+ prediction_result = {
336
+ f'{result["label"]}': result["score"] for result in results
337
+ }
338
+ except Exception:
339
+ # Pipeline prediction failed, need to provide labels
340
+ return prediction_input, None
341
+
342
+ # Display results in original label and mapped label
343
+ prediction_result = {
344
+ f'{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result[
345
+ "score"
346
+ ]
347
+ for result in results
348
+ }
349
+ return prediction_input, prediction_result
350
+
351
+
352
+ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
353
+ # load dataset as pd DataFrame
354
+ # get features column from dataset
355
+ df, dataset_features = check_dataset_features_validity(d_id, config, split)
356
+
357
+ column_mapping, feature_map_df = infer_text_input_column(
358
+ column_mapping, dataset_features
359
+ )
360
+ if feature_map_df is None:
361
+ # dataset does not have any features
362
+ return None, None, None, None, None
363
+
364
+ # Retrieve all labels
365
+ id2label = ppl.model.config.id2label
366
+
367
+ # Infer labels
368
+ id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(
369
+ id2label, dataset_features
370
+ )
371
+ column_mapping, id2label_df = infer_output_label_column(
372
+ column_mapping, id2label_mapping, id2label, dataset_labels
373
+ )
374
+ if id2label_df is None:
375
+ # does not able to infer output label column
376
+ return column_mapping, None, None, None, feature_map_df
377
+
378
+ # Get a sample prediction
379
+ prediction_input, prediction_result = get_sample_prediction(
380
+ ppl, df, column_mapping, id2label_mapping
381
+ )
382
+ if prediction_result is None:
383
+ # does not able to get a sample prediction
384
+ return column_mapping, prediction_input, None, id2label_df, feature_map_df
385
+
386
+ return (
387
+ column_mapping,
388
+ prediction_input,
389
+ prediction_result,
390
+ id2label_df,
391
+ feature_map_df,
392
+ )
393
+
394
+ def strip_model_id_from_url(model_id):
395
+ if model_id.startswith("https://huggingface.co/"):
396
+ return "/".join(model_id.split("/")[-2:])
397
+ return model_id
398
+
399
+ def check_hf_token_validity(hf_token):
400
+ if hf_token == "":
401
+ return False
402
+ if not isinstance(hf_token, str):
403
+ return False
404
+ # use huggingface api to check the token
405
+ headers = {"Authorization": f"Bearer {hf_token}"}
406
+ response = requests.get(AUTH_CHECK_URL, headers=headers)
407
+ if response.status_code != 200:
408
+ return False
409
+ return True
utils/io_utils.py CHANGED
@@ -1,15 +1,25 @@
1
  import os
2
-
3
  import yaml
4
 
5
  YAML_PATH = "../cicd/configs"
6
  LOG_FILE = "../temp_log"
7
 
 
8
 
9
  class Dumper(yaml.Dumper):
10
  def increase_indent(self, flow=False, *args, **kwargs):
11
  return super().increase_indent(flow=flow, indentless=False)
12
 
 
 
 
 
 
 
 
 
 
13
 
14
  def get_yaml_path(uid):
15
  if not os.path.exists(YAML_PATH):
@@ -72,6 +82,8 @@ def read_column_mapping(uid):
72
  config = yaml.load(f, Loader=yaml.FullLoader)
73
  if config:
74
  column_mapping = config.get("column_mapping", dict())
 
 
75
  return column_mapping
76
 
77
 
 
1
  import os
2
+ import logging
3
  import yaml
4
 
5
  YAML_PATH = "../cicd/configs"
6
  LOG_FILE = "../temp_log"
7
 
8
+ logger = logging.getLogger(__name__)
9
 
10
  class Dumper(yaml.Dumper):
11
  def increase_indent(self, flow=False, *args, **kwargs):
12
  return super().increase_indent(flow=flow, indentless=False)
13
 
14
+ def get_submitted_yaml_path(uid):
15
+ if not os.path.exists(f"{YAML_PATH}/submitted"):
16
+ os.makedirs(f"{YAML_PATH}/submitted")
17
+ if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
18
+ logger.error(f"config.yaml does not exist for {uid}")
19
+ os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
20
+ if not os.path.exists(f"{YAML_PATH}/submitted/{uid}_config.yaml"):
21
+ os.system(f"cp {YAML_PATH}/{uid}_config.yaml {YAML_PATH}/submitted/{uid}_config.yaml")
22
+ return f"{YAML_PATH}/submitted/{uid}_config.yaml"
23
 
24
  def get_yaml_path(uid):
25
  if not os.path.exists(YAML_PATH):
 
82
  config = yaml.load(f, Loader=yaml.FullLoader)
83
  if config:
84
  column_mapping = config.get("column_mapping", dict())
85
+ if column_mapping is None:
86
+ column_mapping = {}
87
  return column_mapping
88
 
89
 
utils/run_jobs.py CHANGED
@@ -17,7 +17,7 @@ from app_env import (
17
  HF_SPACE_ID,
18
  HF_WRITE_TOKEN,
19
  )
20
- from utils.io_utils import LOG_FILE, get_yaml_path, write_log_to_user_file
21
  from isolated_env import prepare_venv
22
  from utils.leaderboard import LEADERBOARD
23
 
@@ -50,7 +50,6 @@ def prepare_env_and_get_command(
50
  d_id,
51
  config,
52
  split,
53
- inference,
54
  inference_token,
55
  uid,
56
  label_mapping,
@@ -60,10 +59,6 @@ def prepare_env_and_get_command(
60
  if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
61
  leaderboard_dataset = LEADERBOARD
62
 
63
- inference_type = "hf_pipeline"
64
- if inference and inference_token:
65
- inference_type = "hf_inference_api"
66
-
67
  executable = "giskard_scanner"
68
  try:
69
  # Copy the current requirements (might be changed)
@@ -98,9 +93,9 @@ def prepare_env_and_get_command(
98
  "--label_mapping",
99
  json.dumps(label_mapping),
100
  "--scan_config",
101
- get_yaml_path(uid),
102
  "--inference_type",
103
- inference_type,
104
  "--inference_api_token",
105
  inference_token,
106
  ]
 
17
  HF_SPACE_ID,
18
  HF_WRITE_TOKEN,
19
  )
20
+ from io_utils import LOG_FILE, get_submitted_yaml_path, write_log_to_user_file
21
  from isolated_env import prepare_venv
22
  from utils.leaderboard import LEADERBOARD
23
 
 
50
  d_id,
51
  config,
52
  split,
 
53
  inference_token,
54
  uid,
55
  label_mapping,
 
59
  if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
60
  leaderboard_dataset = LEADERBOARD
61
 
 
 
 
 
62
  executable = "giskard_scanner"
63
  try:
64
  # Copy the current requirements (might be changed)
 
93
  "--label_mapping",
94
  json.dumps(label_mapping),
95
  "--scan_config",
96
+ get_submitted_yaml_path(uid),
97
  "--inference_type",
98
+ "hf_inference_api",
99
  "--inference_api_token",
100
  inference_token,
101
  ]
utils/ui_helpers.py CHANGED
@@ -7,10 +7,15 @@ import datasets
7
  import gradio as gr
8
  import pandas as pd
9
 
10
- import utils.leaderboard as leaderboard
11
- from utils.io_utils import read_column_mapping, write_column_mapping
12
- from utils.run_jobs import save_job_to_pipe
13
- from utils.text_classification import (
 
 
 
 
 
14
  strip_model_id_from_url,
15
  check_model_task,
16
  preload_hf_inference_api,
@@ -26,10 +31,11 @@ from utils.wordings import (
26
  NOT_TEXT_CLASSIFICATION_MODEL_RAW,
27
  UNMATCHED_MODEL_DATASET_STYLED_ERROR,
28
  CHECK_LOG_SECTION_RAW,
29
- get_styled_input,
30
  get_dataset_fetch_error_raw,
31
  )
32
  import os
 
33
 
34
  MAX_LABELS = 40
35
  MAX_FEATURES = 20
@@ -47,9 +53,20 @@ def get_related_datasets_from_leaderboard(model_id):
47
  datasets_unique = list(model_records["dataset_id"].unique())
48
 
49
  if len(datasets_unique) == 0:
50
- return gr.update(choices=[], value="")
51
 
52
- return gr.update(choices=datasets_unique, value="")
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def check_dataset(dataset_id):
55
  logger.info(f"Loading {dataset_id}")
@@ -61,9 +78,7 @@ def check_dataset(dataset_id):
61
  gr.update(visible=False),
62
  ""
63
  )
64
- splits = datasets.get_dataset_split_names(
65
- dataset_id, configs[0], trust_remote_code=True
66
- )
67
  return (
68
  gr.update(choices=configs, value=configs[0], visible=True),
69
  gr.update(choices=splits, value=splits[0], visible=True),
@@ -125,7 +140,7 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
125
  ds_labels = list(shared_labels)
126
  if len(ds_labels) > MAX_LABELS:
127
  ds_labels = ds_labels[:MAX_LABELS]
128
- gr.Warning(f"The number of labels is truncated to length {MAX_LABELS}")
129
 
130
  # sort labels to make sure the order is consistent
131
  # prediction gives the order based on probability
@@ -166,33 +181,67 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
166
 
167
  def precheck_model_ds_enable_example_btn(
168
  model_id, dataset_id, dataset_config, dataset_split
169
- ):
170
- if model_id == "" or dataset_id == "":
171
- return (gr.update(interactive=False), gr.update(visible=False), "")
172
  model_id = strip_model_id_from_url(model_id)
173
  model_task = check_model_task(model_id)
174
  preload_hf_inference_api(model_id)
175
- if model_task is None or model_task != "text-classification":
176
- gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
177
- return (gr.update(interactive=False), gr.update(visible=False), "")
178
-
179
  if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
180
- return (gr.update(interactive=False), gr.update(visible=False), "")
181
-
 
 
 
 
 
 
 
182
  try:
183
  ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
184
  df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
185
- ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
188
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
189
- return (gr.update(interactive=False), gr.update(value=df, visible=True), "")
 
 
 
 
 
 
 
190
 
191
- return (gr.update(interactive=True), gr.update(value=df, visible=True), "")
 
 
 
 
 
 
 
192
  except Exception as e:
193
  # Config or split wrong
194
  logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
195
- return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
 
 
 
 
 
 
 
196
 
197
 
198
  def align_columns_and_show_prediction(
@@ -201,8 +250,8 @@ def align_columns_and_show_prediction(
201
  dataset_config,
202
  dataset_split,
203
  uid,
204
- run_inference,
205
- inference_token,
206
  ):
207
  model_id = strip_model_id_from_url(model_id)
208
  model_task = check_model_task(model_id)
@@ -221,7 +270,7 @@ def align_columns_and_show_prediction(
221
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
222
  ]
223
 
224
- hf_token = os.environ.get("HF_WRITE_TOKEN", default="")
225
 
226
  prediction_input, prediction_response = get_example_prediction(
227
  model_id, dataset_id, dataset_config, dataset_split, hf_token
@@ -229,6 +278,7 @@ def align_columns_and_show_prediction(
229
 
230
  if prediction_input is None or prediction_response is None:
231
  return (
 
232
  gr.update(visible=False),
233
  gr.update(visible=False),
234
  gr.update(visible=False, open=False),
@@ -239,6 +289,7 @@ def align_columns_and_show_prediction(
239
 
240
  if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
241
  return (
 
242
  gr.update(visible=False),
243
  gr.update(visible=False),
244
  gr.update(visible=False, open=False),
@@ -250,12 +301,13 @@ def align_columns_and_show_prediction(
250
  model_labels = list(prediction_response.keys())
251
 
252
  ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
253
- ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
254
 
255
  # when dataset does not have labels or features
256
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
257
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
258
  return (
 
259
  gr.update(visible=False),
260
  gr.update(visible=False),
261
  gr.update(visible=False, open=False),
@@ -268,6 +320,7 @@ def align_columns_and_show_prediction(
268
  return (
269
  gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
270
  gr.update(visible=False),
 
271
  gr.update(visible=False, open=False),
272
  gr.update(interactive=False),
273
  "",
@@ -289,18 +342,20 @@ def align_columns_and_show_prediction(
289
  ):
290
  return (
291
  gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
292
- gr.update(visible=False),
 
293
  gr.update(visible=True, open=True),
294
- gr.update(interactive=(run_inference and inference_token != "")),
295
  "",
296
  *column_mappings,
297
  )
298
 
299
  return (
300
- gr.update(value=get_styled_input(prediction_input), visible=True),
 
301
  gr.update(value=prediction_response, visible=True),
302
  gr.update(visible=True, open=False),
303
- gr.update(interactive=(run_inference and inference_token != "")),
304
  "",
305
  *column_mappings,
306
  )
@@ -308,18 +363,20 @@ def align_columns_and_show_prediction(
308
 
309
  def check_column_mapping_keys_validity(all_mappings):
310
  if all_mappings is None:
 
311
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
312
  return False
313
 
314
  if "labels" not in all_mappings.keys():
315
- gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
316
  return False
317
 
318
  return True
319
 
320
- def enable_run_btn(uid, run_inference, inference_token, model_id, dataset_id, dataset_config, dataset_split):
321
- if not run_inference or inference_token == "":
322
- logger.warn("Inference API is not enabled")
 
323
  return gr.update(interactive=False)
324
  if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
325
  logger.warn("Model id or dataset id is not selected")
@@ -330,26 +387,27 @@ def enable_run_btn(uid, run_inference, inference_token, model_id, dataset_id, da
330
  logger.warn("Column mapping is not valid")
331
  return gr.update(interactive=False)
332
 
333
- if not check_hf_token_validity(inference_token):
334
- logger.warn("HF token is not valid")
335
- return gr.update(interactive=False)
336
- return gr.update(interactive=True)
337
-
338
- def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
339
  label_mapping = {}
340
  if len(all_mappings["labels"].keys()) != len(ds_labels):
341
- logger.warn("Label mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
342
 
343
  if len(all_mappings["features"].keys()) != len(ds_features):
344
- logger.warn("Feature mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
345
 
346
  for i, label in zip(range(len(ds_labels)), ds_labels):
347
  # align the saved labels with dataset labels order
348
  label_mapping.update({str(i): all_mappings["labels"][label]})
349
 
350
  if "features" not in all_mappings.keys():
 
351
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
352
  feature_mapping = all_mappings["features"]
 
 
353
  return label_mapping, feature_mapping
354
 
355
  def show_hf_token_info(token):
@@ -358,16 +416,18 @@ def show_hf_token_info(token):
358
  return gr.update(visible=True)
359
  return gr.update(visible=False)
360
 
361
- def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
 
 
362
  all_mappings = read_column_mapping(uid)
363
  if not check_column_mapping_keys_validity(all_mappings):
364
  return (gr.update(interactive=True), gr.update(visible=False))
365
 
366
  # get ds labels and features again for alignment
367
  ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
368
- ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
369
- label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
370
-
371
  eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
372
  save_job_to_pipe(
373
  uid,
@@ -376,8 +436,7 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
376
  d_id,
377
  config,
378
  split,
379
- inference,
380
- inference_token,
381
  uid,
382
  label_mapping,
383
  feature_mapping,
@@ -387,8 +446,16 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
387
  )
388
  gr.Info("Your evaluation has been submitted")
389
 
 
 
 
 
390
  return (
391
  gr.update(interactive=False), # Submit button
392
  gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
393
- uuid.uuid4(), # Allocate a new uuid
 
 
 
 
394
  )
 
7
  import gradio as gr
8
  import pandas as pd
9
 
10
+ import leaderboard
11
+ from io_utils import (
12
+ read_column_mapping,
13
+ write_column_mapping,
14
+ read_scanners,
15
+ write_scanners,
16
+ )
17
+ from run_jobs import save_job_to_pipe
18
+ from text_classification import (
19
  strip_model_id_from_url,
20
  check_model_task,
21
  preload_hf_inference_api,
 
31
  NOT_TEXT_CLASSIFICATION_MODEL_RAW,
32
  UNMATCHED_MODEL_DATASET_STYLED_ERROR,
33
  CHECK_LOG_SECTION_RAW,
34
+ VALIDATED_MODEL_DATASET_STYLED,
35
  get_dataset_fetch_error_raw,
36
  )
37
  import os
38
+ from app_env import HF_WRITE_TOKEN
39
 
40
  MAX_LABELS = 40
41
  MAX_FEATURES = 20
 
53
  datasets_unique = list(model_records["dataset_id"].unique())
54
 
55
  if len(datasets_unique) == 0:
56
+ return gr.update(choices=[])
57
 
58
+ return gr.update(choices=datasets_unique)
59
+
60
+
61
+ logger = logging.getLogger(__file__)
62
+
63
+ def get_dataset_splits(dataset_id, dataset_config):
64
+ try:
65
+ splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
66
+ return gr.update(choices=splits, value=splits[0], visible=True)
67
+ except Exception as e:
68
+ logger.warn(f"Check your dataset {dataset_id} and config {dataset_config}: {e}")
69
+ return gr.update(visible=False)
70
 
71
  def check_dataset(dataset_id):
72
  logger.info(f"Loading {dataset_id}")
 
78
  gr.update(visible=False),
79
  ""
80
  )
81
+ splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
 
 
82
  return (
83
  gr.update(choices=configs, value=configs[0], visible=True),
84
  gr.update(choices=splits, value=splits[0], visible=True),
 
140
  ds_labels = list(shared_labels)
141
  if len(ds_labels) > MAX_LABELS:
142
  ds_labels = ds_labels[:MAX_LABELS]
143
+ gr.Warning(f"Too many labels to display for this spcae. We do not support more than {MAX_LABELS} in this space. You can use cli tool at https://github.com/Giskard-AI/cicd.")
144
 
145
  # sort labels to make sure the order is consistent
146
  # prediction gives the order based on probability
 
181
 
182
  def precheck_model_ds_enable_example_btn(
183
  model_id, dataset_id, dataset_config, dataset_split
184
+ ):
 
 
185
  model_id = strip_model_id_from_url(model_id)
186
  model_task = check_model_task(model_id)
187
  preload_hf_inference_api(model_id)
188
+
 
 
 
189
  if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
190
+ return (
191
+ gr.update(interactive=False),
192
+ gr.update(visible=False),
193
+ gr.update(visible=False),
194
+ gr.update(visible=False),
195
+ gr.update(visible=False),
196
+ gr.update(visible=False),
197
+ )
198
+
199
  try:
200
  ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
201
  df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
202
+ ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds[dataset_split])
203
+
204
+ if model_task is None or model_task != "text-classification":
205
+ gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
206
+ return (
207
+ gr.update(interactive=False),
208
+ gr.update(value=df, visible=True),
209
+ gr.update(visible=False),
210
+ gr.update(visible=False),
211
+ gr.update(visible=False),
212
+ gr.update(visible=False),
213
+ )
214
 
215
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
216
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
217
+ return (
218
+ gr.update(interactive=False),
219
+ gr.update(value=df, visible=True),
220
+ gr.update(visible=False),
221
+ gr.update(visible=False),
222
+ gr.update(visible=False),
223
+ gr.update(visible=False),
224
+ )
225
 
226
+ return (
227
+ gr.update(interactive=True),
228
+ gr.update(value=df, visible=True),
229
+ gr.update(visible=False),
230
+ gr.update(visible=False),
231
+ gr.update(visible=False),
232
+ gr.update(visible=False),
233
+ )
234
  except Exception as e:
235
  # Config or split wrong
236
  logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
237
+ return (
238
+ gr.update(interactive=False),
239
+ gr.update(visible=False),
240
+ gr.update(visible=False),
241
+ gr.update(visible=False),
242
+ gr.update(visible=False),
243
+ gr.update(visible=False),
244
+ )
245
 
246
 
247
  def align_columns_and_show_prediction(
 
250
  dataset_config,
251
  dataset_split,
252
  uid,
253
+ profile: gr.OAuthProfile | None,
254
+ oauth_token: gr.OAuthToken | None,
255
  ):
256
  model_id = strip_model_id_from_url(model_id)
257
  model_task = check_model_task(model_id)
 
270
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
271
  ]
272
 
273
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
274
 
275
  prediction_input, prediction_response = get_example_prediction(
276
  model_id, dataset_id, dataset_config, dataset_split, hf_token
 
278
 
279
  if prediction_input is None or prediction_response is None:
280
  return (
281
+ gr.update(visible=False),
282
  gr.update(visible=False),
283
  gr.update(visible=False),
284
  gr.update(visible=False, open=False),
 
289
 
290
  if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
291
  return (
292
+ gr.update(visible=False),
293
  gr.update(visible=False),
294
  gr.update(visible=False),
295
  gr.update(visible=False, open=False),
 
301
  model_labels = list(prediction_response.keys())
302
 
303
  ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
304
+ ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds)
305
 
306
  # when dataset does not have labels or features
307
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
308
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
309
  return (
310
+ gr.update(visible=False),
311
  gr.update(visible=False),
312
  gr.update(visible=False),
313
  gr.update(visible=False, open=False),
 
320
  return (
321
  gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
322
  gr.update(visible=False),
323
+ gr.update(visible=False),
324
  gr.update(visible=False, open=False),
325
  gr.update(interactive=False),
326
  "",
 
342
  ):
343
  return (
344
  gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
345
+ gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
346
+ gr.update(value=prediction_response, visible=True),
347
  gr.update(visible=True, open=True),
348
+ gr.update(interactive=(profile is not None and oauth_token is not None)),
349
  "",
350
  *column_mappings,
351
  )
352
 
353
  return (
354
+ gr.update(value=VALIDATED_MODEL_DATASET_STYLED, visible=True),
355
+ gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
356
  gr.update(value=prediction_response, visible=True),
357
  gr.update(visible=True, open=False),
358
+ gr.update(interactive=(profile is not None and oauth_token is not None)),
359
  "",
360
  *column_mappings,
361
  )
 
363
 
364
  def check_column_mapping_keys_validity(all_mappings):
365
  if all_mappings is None:
366
+ logger.warning("all_mapping is None")
367
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
368
  return False
369
 
370
  if "labels" not in all_mappings.keys():
371
+ logger.warning(f"Label mapping is not valid, all_mappings: {all_mappings}")
372
  return False
373
 
374
  return True
375
 
376
+ def enable_run_btn(uid, model_id, dataset_id, dataset_config, dataset_split, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
377
+ if profile is None:
378
+ return gr.update(interactive=False)
379
+ if oath_token is None:
380
  return gr.update(interactive=False)
381
  if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
382
  logger.warn("Model id or dataset id is not selected")
 
387
  logger.warn("Column mapping is not valid")
388
  return gr.update(interactive=False)
389
 
390
+ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys=None):
 
 
 
 
 
391
  label_mapping = {}
392
  if len(all_mappings["labels"].keys()) != len(ds_labels):
393
+ logger.warn(f"""Label mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
394
+ \nall_mappings: {all_mappings}\nds_labels: {ds_labels}""")
395
 
396
  if len(all_mappings["features"].keys()) != len(ds_features):
397
+ logger.warn(f"""Feature mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
398
+ \nall_mappings: {all_mappings}\nds_features: {ds_features}""")
399
 
400
  for i, label in zip(range(len(ds_labels)), ds_labels):
401
  # align the saved labels with dataset labels order
402
  label_mapping.update({str(i): all_mappings["labels"][label]})
403
 
404
  if "features" not in all_mappings.keys():
405
+ logger.warning("features not in all_mappings")
406
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
407
+
408
  feature_mapping = all_mappings["features"]
409
+ if len(label_keys) > 0:
410
+ feature_mapping.update({"label": label_keys[0]})
411
  return label_mapping, feature_mapping
412
 
413
  def show_hf_token_info(token):
 
416
  return gr.update(visible=True)
417
  return gr.update(visible=False)
418
 
419
+ def try_submit(m_id, d_id, config, split, uid, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
420
+ print(oath_token.token)
421
+ print(".>>>>>>>>>>>>>>>>>>>>>>")
422
  all_mappings = read_column_mapping(uid)
423
  if not check_column_mapping_keys_validity(all_mappings):
424
  return (gr.update(interactive=True), gr.update(visible=False))
425
 
426
  # get ds labels and features again for alignment
427
  ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
428
+ ds_labels, ds_features, label_keys = get_labels_and_features_from_dataset(ds)
429
+ label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys)
430
+
431
  eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
432
  save_job_to_pipe(
433
  uid,
 
436
  d_id,
437
  config,
438
  split,
439
+ oath_token.token,
 
440
  uid,
441
  label_mapping,
442
  feature_mapping,
 
446
  )
447
  gr.Info("Your evaluation has been submitted")
448
 
449
+ new_uid = uuid.uuid4()
450
+ scanners = read_scanners(uid)
451
+ write_scanners(scanners, new_uid)
452
+
453
  return (
454
  gr.update(interactive=False), # Submit button
455
  gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
456
+ new_uid, # Allocate a new uuid
457
+ gr.update(visible=False),
458
+ gr.update(visible=False),
459
+ gr.update(visible=False),
460
+ gr.update(visible=False),
461
  )
utils/wordings.py CHANGED
@@ -2,23 +2,24 @@ INTRODUCTION_MD = """
2
  <h1 style="text-align: center;">
3
  🐢Giskard Evaluator - Text Classification
4
  </h1>
5
- Welcome to the Giskard Evaluator Space! Get a model vulnerability report immediately by simply sharing your model and dataset id below.
 
6
  """
7
  CONFIRM_MAPPING_DETAILS_MD = """
8
  <h1 style="text-align: center;">
9
  Confirm Pre-processing Details
10
  </h1>
11
- Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model.
12
  """
13
  CONFIRM_MAPPING_DETAILS_FAIL_MD = """
14
  <h1 style="text-align: center;">
15
  Confirm Pre-processing Details
16
  </h1>
17
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
18
  """
19
 
20
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
21
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
22
  """
23
 
24
  CHECK_CONFIG_OR_SPLIT_RAW = """
@@ -38,7 +39,7 @@ PREDICTION_SAMPLE_MD = """
38
 
39
  MAPPING_STYLED_ERROR_WARNING = """
40
  <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
41
- ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
42
  </h3>
43
  """
44
 
@@ -57,7 +58,11 @@ USE_INFERENCE_API_TIP = """
57
  <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
58
  Hugging Face Inference API
59
  </a>
60
- . Please input your <a href="https://huggingface.co/settings/tokens">Hugging Face token</a> to do so.
 
 
 
 
61
  """
62
 
63
  HF_TOKEN_INVALID_STYLED= """
@@ -66,10 +71,10 @@ HF_TOKEN_INVALID_STYLED= """
66
  </p>
67
  """
68
 
 
 
 
 
 
69
  def get_dataset_fetch_error_raw(error):
70
  return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""
71
-
72
- def get_styled_input(input):
73
- return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
74
- Your model and dataset have been validated! <br /> Sample input: {input}
75
- </h3>"""
 
2
  <h1 style="text-align: center;">
3
  🐢Giskard Evaluator - Text Classification
4
  </h1>
5
+ Welcome to the Giskard Evaluator Space! Get a model vulnerability report immediately by simply sharing your model and dataset id below.
6
+ You can also checkout our library documentation <a href="https://docs.giskard.ai/en/latest/getting_started/quickstart/index.html">here</a>.
7
  """
8
  CONFIRM_MAPPING_DETAILS_MD = """
9
  <h1 style="text-align: center;">
10
  Confirm Pre-processing Details
11
  </h1>
12
+ Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model. You can select the output variable's labels from the dropdowns below.
13
  """
14
  CONFIRM_MAPPING_DETAILS_FAIL_MD = """
15
  <h1 style="text-align: center;">
16
  Confirm Pre-processing Details
17
  </h1>
18
+ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
19
  """
20
 
21
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
22
+ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
23
  """
24
 
25
  CHECK_CONFIG_OR_SPLIT_RAW = """
 
39
 
40
  MAPPING_STYLED_ERROR_WARNING = """
41
  <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
42
+ ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
43
  </h3>
44
  """
45
 
 
58
  <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
59
  Hugging Face Inference API
60
  </a>
61
+ . Please input your <a href="https://huggingface.co/settings/tokens">Hugging Face token</a> to do so. You can find it <a href="https://huggingface.co/settings/tokens">here</a>.
62
+ """
63
+
64
+ LOG_IN_TIPS = """
65
+ To use the Hugging Face Inference API, you need to log in to your Hugging Face account.
66
  """
67
 
68
  HF_TOKEN_INVALID_STYLED= """
 
71
  </p>
72
  """
73
 
74
+ VALIDATED_MODEL_DATASET_STYLED = """
75
+ <h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
76
+ Your model and dataset have been validated!
77
+ </h3>"""
78
+
79
  def get_dataset_fetch_error_raw(error):
80
  return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""