ZeroCommand commited on
Commit
14bc302
1 Parent(s): 088f179

test login button

Browse files
README.md CHANGED
@@ -7,6 +7,13 @@ sdk: gradio
7
  sdk_version: 4.7.1
8
  app_file: app.py
9
  pinned: false
 
 
 
 
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 4.7.1
8
  app_file: app.py
9
  pinned: false
10
+
11
+ hf_oauth: true
12
+ # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
13
+ hf_oauth_expiration_minutes: 480
14
+ # optional, see "Scopes" below. "openid profile" is always included.
15
+ hf_oauth_scopes:
16
+ - inference-api
17
  ---
18
 
19
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -5,7 +5,7 @@ import gradio as gr
5
  from app_debug import get_demo as get_demo_debug
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
8
- from utils.run_jobs import start_process_run_job, stop_thread
9
 
10
  try:
11
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
 
5
  from app_debug import get_demo as get_demo_debug
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
8
+ from run_jobs import start_process_run_job, stop_thread
9
 
10
  try:
11
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
app_debug.py CHANGED
@@ -3,12 +3,12 @@ from os.path import isfile, join
3
  import html
4
 
5
  import gradio as gr
6
-
7
- import utils.pipe as pipe
8
- from utils.io_utils import get_logs_file
9
 
10
  LOG_PATH = "./tmp"
11
- CONFIG_PATH = "./cicd/configs/"
12
  MAX_FILES_NUM = 20
13
 
14
 
@@ -69,17 +69,19 @@ def get_queue_status():
69
 
70
 
71
  def get_demo():
 
 
72
  with gr.Row():
73
  gr.HTML(
74
  value=get_queue_status,
75
  every=5,
76
  )
77
- with gr.Accordion(label="Log Files", open=False):
78
- with gr.Row():
79
- gr.Files(value=get_log_files, label="Log Files", every=10)
80
  with gr.Row():
81
  gr.Textbox(
82
  value=get_logs_file, every=0.5, lines=10, visible=True, label="Current Log File"
83
  )
 
 
84
  with gr.Accordion(label="Config Files", open=False):
85
  gr.Files(value=get_config_files, label="Config Files", every=10)
 
3
  import html
4
 
5
  import gradio as gr
6
+ import os
7
+ import pipe
8
+ from io_utils import get_logs_file
9
 
10
  LOG_PATH = "./tmp"
11
+ CONFIG_PATH = "./cicd/configs/submitted/"
12
  MAX_FILES_NUM = 20
13
 
14
 
 
69
 
70
 
71
  def get_demo():
72
+ if not os.path.exists(CONFIG_PATH):
73
+ os.makedirs(CONFIG_PATH)
74
  with gr.Row():
75
  gr.HTML(
76
  value=get_queue_status,
77
  every=5,
78
  )
79
+ with gr.Accordion(label="Log Files", open=True):
 
 
80
  with gr.Row():
81
  gr.Textbox(
82
  value=get_logs_file, every=0.5, lines=10, visible=True, label="Current Log File"
83
  )
84
+ with gr.Row():
85
+ gr.Files(value=get_log_files, label="Log Files", every=10)
86
  with gr.Accordion(label="Config Files", open=False):
87
  gr.Files(value=get_config_files, label="Config Files", every=10)
app_leaderboard.py CHANGED
@@ -5,10 +5,10 @@ import gradio as gr
5
  import pandas as pd
6
  import datetime
7
 
8
- from utils.fetch_utils import (check_dataset_and_get_config,
9
  check_dataset_and_get_split)
10
 
11
- import utils.leaderboard as leaderboard
12
  logger = logging.getLogger(__name__)
13
  global update_time
14
  update_time = datetime.datetime.fromtimestamp(0)
@@ -88,11 +88,29 @@ def get_demo(leaderboard_tab):
88
  dataset_ids = get_dataset_ids(records)
89
 
90
  column_names = records.columns.tolist()
 
 
91
  default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
92
  default_df = records[default_columns] # extract columns selected
93
  types = get_types(default_df)
94
  display_df = get_display_df(default_df) # the styled dataframe to display
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  with gr.Row():
97
  task_select = gr.Dropdown(
98
  label="Task",
@@ -110,42 +128,35 @@ def get_demo(leaderboard_tab):
110
  interactive=True,
111
  )
112
 
113
- with gr.Row():
114
- columns_select = gr.CheckboxGroup(
115
- label="Show columns",
116
- choices=column_names,
117
- value=default_columns,
118
- interactive=True,
119
- )
120
-
121
  with gr.Row():
122
  leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
123
 
124
- def update_leaderboard_records(model_id, dataset_id, columns, task):
125
  global update_time
126
  if datetime.datetime.now() - update_time < datetime.timedelta(minutes=10):
127
  return gr.update()
128
  update_time = datetime.datetime.now()
129
  logger.info("Updating leaderboard records")
130
  leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
131
- return filter_table(model_id, dataset_id, columns, task)
132
 
133
  leaderboard_tab.select(
134
  fn=update_leaderboard_records,
135
- inputs=[model_select, dataset_select, columns_select, task_select],
136
  outputs=[leaderboard_df])
137
 
138
  @gr.on(
139
  triggers=[
140
  model_select.change,
141
  dataset_select.change,
142
- columns_select.change,
 
143
  task_select.change,
144
  ],
145
- inputs=[model_select, dataset_select, columns_select, task_select],
146
  outputs=[leaderboard_df],
147
  )
148
- def filter_table(model_id, dataset_id, columns, task):
149
  logger.info("Filtering leaderboard records")
150
  records = leaderboard.records
151
  # filter the table based on task
@@ -156,8 +167,9 @@ def get_demo(leaderboard_tab):
156
  if dataset_id and dataset_id != "Any":
157
  df = df[(df["dataset_id"] == dataset_id)]
158
 
159
- # filter the table based on the columns
160
- df = df[columns]
 
161
  types = get_types(df)
162
  display_df = get_display_df(df)
163
  return gr.update(value=display_df, datatype=types, interactive=False)
 
5
  import pandas as pd
6
  import datetime
7
 
8
+ from fetch_utils import (check_dataset_and_get_config,
9
  check_dataset_and_get_split)
10
 
11
+ import leaderboard
12
  logger = logging.getLogger(__name__)
13
  global update_time
14
  update_time = datetime.datetime.fromtimestamp(0)
 
88
  dataset_ids = get_dataset_ids(records)
89
 
90
  column_names = records.columns.tolist()
91
+ issue_columns = column_names[:11]
92
+ info_columns = column_names[15:]
93
  default_columns = ["model_id", "dataset_id", "total_issues", "report_link"]
94
  default_df = records[default_columns] # extract columns selected
95
  types = get_types(default_df)
96
  display_df = get_display_df(default_df) # the styled dataframe to display
97
 
98
+ with gr.Row():
99
+ with gr.Column():
100
+ issue_columns_select = gr.CheckboxGroup(
101
+ label="Issue Columns",
102
+ choices=issue_columns,
103
+ value=[],
104
+ interactive=True,
105
+ )
106
+ with gr.Column():
107
+ info_columns_select = gr.CheckboxGroup(
108
+ label="Info Columns",
109
+ choices=info_columns,
110
+ value=default_columns,
111
+ interactive=True,
112
+ )
113
+
114
  with gr.Row():
115
  task_select = gr.Dropdown(
116
  label="Task",
 
128
  interactive=True,
129
  )
130
 
 
 
 
 
 
 
 
 
131
  with gr.Row():
132
  leaderboard_df = gr.DataFrame(display_df, datatype=types, interactive=False)
133
 
134
+ def update_leaderboard_records(model_id, dataset_id, issue_columns, info_columns, task):
135
  global update_time
136
  if datetime.datetime.now() - update_time < datetime.timedelta(minutes=10):
137
  return gr.update()
138
  update_time = datetime.datetime.now()
139
  logger.info("Updating leaderboard records")
140
  leaderboard.records = get_records_from_dataset_repo(leaderboard.LEADERBOARD)
141
+ return filter_table(model_id, dataset_id, issue_columns, info_columns, task)
142
 
143
  leaderboard_tab.select(
144
  fn=update_leaderboard_records,
145
+ inputs=[model_select, dataset_select, issue_columns_select, info_columns_select, task_select],
146
  outputs=[leaderboard_df])
147
 
148
  @gr.on(
149
  triggers=[
150
  model_select.change,
151
  dataset_select.change,
152
+ issue_columns_select.change,
153
+ info_columns_select.change,
154
  task_select.change,
155
  ],
156
+ inputs=[model_select, dataset_select, issue_columns_select, info_columns_select, task_select],
157
  outputs=[leaderboard_df],
158
  )
159
+ def filter_table(model_id, dataset_id, issue_columns, info_columns, task):
160
  logger.info("Filtering leaderboard records")
161
  records = leaderboard.records
162
  # filter the table based on task
 
167
  if dataset_id and dataset_id != "Any":
168
  df = df[(df["dataset_id"] == dataset_id)]
169
 
170
+ # filter the table based on the columns
171
+ issue_columns.sort()
172
+ df = df[info_columns + issue_columns]
173
  types = get_types(df)
174
  display_df = get_display_df(df)
175
  return gr.update(value=display_df, datatype=types, interactive=False)
app_legacy.py CHANGED
@@ -376,7 +376,7 @@ def get_demo():
376
  selected = read_scanners("./config.yaml")
377
  scan_config = selected + ["data_leakage"]
378
  scanners = gr.CheckboxGroup(
379
- choices=scan_config, value=selected, label="Scan Settings", visible=True
380
  )
381
 
382
  with gr.Row():
 
376
  selected = read_scanners("./config.yaml")
377
  scan_config = selected + ["data_leakage"]
378
  scanners = gr.CheckboxGroup(
379
+ choices=scan_config, value=selected, visible=True
380
  )
381
 
382
  with gr.Row():
app_text_classification.py CHANGED
@@ -2,12 +2,12 @@ import uuid
2
 
3
  import gradio as gr
4
 
5
- from utils.io_utils import read_scanners, write_scanners
6
- from utils.ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
 
9
  check_dataset,
10
- show_hf_token_info,
11
  precheck_model_ds_enable_example_btn,
12
  try_submit,
13
  empty_column_mapping,
@@ -16,12 +16,11 @@ from utils.ui_helpers import (
16
  )
17
 
18
  import logging
19
- from utils.wordings import (
20
  CONFIRM_MAPPING_DETAILS_MD,
21
  INTRODUCTION_MD,
22
- USE_INFERENCE_API_TIP,
23
  CHECK_LOG_SECTION_RAW,
24
- HF_TOKEN_INVALID_STYLED
25
  )
26
 
27
  MAX_LABELS = 40
@@ -34,6 +33,8 @@ logger = logging.getLogger(__name__)
34
  def get_demo():
35
  with gr.Row():
36
  gr.Markdown(INTRODUCTION_MD)
 
 
37
  uid_label = gr.Textbox(
38
  label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
39
  )
@@ -58,7 +59,7 @@ def get_demo():
58
  with gr.Row():
59
  first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
60
  with gr.Row():
61
- loading_status = gr.HTML(visible=True)
62
  with gr.Row():
63
  example_btn = gr.Button(
64
  "Validate Model & Dataset",
@@ -66,11 +67,13 @@ def get_demo():
66
  variant="primary",
67
  interactive=False,
68
  )
69
-
70
  with gr.Row():
71
- example_input = gr.HTML(visible=False)
 
 
72
  with gr.Row():
73
- example_prediction = gr.Label(label="Model Prediction Sample", visible=False)
 
74
 
75
  with gr.Row():
76
  with gr.Accordion(
@@ -89,27 +92,8 @@ def get_demo():
89
  for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
90
  column_mappings.append(gr.Dropdown(visible=False))
91
 
92
- with gr.Accordion(label="Model Wrap Advance Config", open=True):
93
- gr.HTML(USE_INFERENCE_API_TIP)
94
-
95
- run_inference = gr.Checkbox(value=True, label="Run with Inference API")
96
- inference_token = gr.Textbox(
97
- placeholder="hf-xxxxxxxxxxxxxxxxxxxx",
98
- value="",
99
- label="HF Token for Inference API",
100
- visible=True,
101
- interactive=True,
102
- )
103
- inference_token_info = gr.HTML(value=HF_TOKEN_INVALID_STYLED, visible=False)
104
-
105
- inference_token.change(
106
- fn=show_hf_token_info,
107
- inputs=[inference_token],
108
- outputs=[inference_token_info],
109
- )
110
-
111
- with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
112
- scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
113
 
114
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
115
  def get_scanners(uid):
@@ -117,7 +101,16 @@ def get_demo():
117
  # we remove data_leakage from the default scanners
118
  # Reason: data_leakage barely raises any issues and takes too many requests
119
  # when using inference API, causing rate limit error
120
- scan_config = selected + ["data_leakage"]
 
 
 
 
 
 
 
 
 
121
  return gr.update(
122
  choices=scan_config, value=selected, label="Scan Settings", visible=True
123
  )
@@ -147,18 +140,20 @@ def get_demo():
147
  inputs=[model_id_input],
148
  outputs=[dataset_id_input],
149
  ).then(
150
- fn=check_dataset,
151
- inputs=[dataset_id_input],
152
- outputs=[dataset_config_input, dataset_split_input, loading_status]
153
  )
154
 
155
  gr.on(
156
- triggers=[dataset_id_input.change],
157
  fn=check_dataset,
158
  inputs=[dataset_id_input],
159
- outputs=[dataset_config_input, dataset_split_input, loading_status]
160
  )
161
 
 
 
162
  gr.on(
163
  triggers=[model_id_input.change, dataset_id_input.change, dataset_config_input.change],
164
  fn=empty_column_mapping,
@@ -187,6 +182,7 @@ def get_demo():
187
  gr.on(
188
  triggers=[
189
  model_id_input.change,
 
190
  dataset_id_input.change,
191
  dataset_config_input.change,
192
  dataset_split_input.change,
@@ -198,7 +194,13 @@ def get_demo():
198
  dataset_config_input,
199
  dataset_split_input,
200
  ],
201
- outputs=[example_btn, first_line_ds, loading_status],
 
 
 
 
 
 
202
  )
203
 
204
  gr.on(
@@ -212,15 +214,14 @@ def get_demo():
212
  dataset_config_input,
213
  dataset_split_input,
214
  uid_label,
215
- run_inference,
216
- inference_token,
217
  ],
218
  outputs=[
 
219
  example_input,
220
  example_prediction,
221
  column_mapping_accordion,
222
  run_btn,
223
- loading_status,
224
  *column_mappings,
225
  ],
226
  )
@@ -235,24 +236,26 @@ def get_demo():
235
  dataset_id_input,
236
  dataset_config_input,
237
  dataset_split_input,
238
- run_inference,
239
- inference_token,
240
  uid_label,
241
  ],
242
- outputs=[run_btn, logs, uid_label],
 
 
 
 
 
 
 
 
243
  )
244
 
245
  gr.on(
246
  triggers=[
247
- run_inference.input,
248
- inference_token.input,
249
  scanners.input,
250
  ],
251
  fn=enable_run_btn,
252
  inputs=[
253
  uid_label,
254
- run_inference,
255
- inference_token,
256
  model_id_input,
257
  dataset_id_input,
258
  dataset_config_input,
@@ -266,8 +269,6 @@ def get_demo():
266
  fn=enable_run_btn,
267
  inputs=[
268
  uid_label,
269
- run_inference,
270
- inference_token,
271
  model_id_input,
272
  dataset_id_input,
273
  dataset_config_input,
 
2
 
3
  import gradio as gr
4
 
5
+ from io_utils import read_scanners, write_scanners
6
+ from text_classification_ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
9
+ get_dataset_splits,
10
  check_dataset,
 
11
  precheck_model_ds_enable_example_btn,
12
  try_submit,
13
  empty_column_mapping,
 
16
  )
17
 
18
  import logging
19
+ from wordings import (
20
  CONFIRM_MAPPING_DETAILS_MD,
21
  INTRODUCTION_MD,
22
+ LOG_IN_TIPS,
23
  CHECK_LOG_SECTION_RAW,
 
24
  )
25
 
26
  MAX_LABELS = 40
 
33
  def get_demo():
34
  with gr.Row():
35
  gr.Markdown(INTRODUCTION_MD)
36
+ gr.HTML(LOG_IN_TIPS)
37
+ gr.LoginButton()
38
  uid_label = gr.Textbox(
39
  label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
40
  )
 
59
  with gr.Row():
60
  first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
61
  with gr.Row():
62
+ loading_dataset_info = gr.HTML(visible=True)
63
  with gr.Row():
64
  example_btn = gr.Button(
65
  "Validate Model & Dataset",
 
67
  variant="primary",
68
  interactive=False,
69
  )
 
70
  with gr.Row():
71
+ loading_validation = gr.HTML(visible=True)
72
+ with gr.Row():
73
+ validation_result = gr.HTML(visible=False)
74
  with gr.Row():
75
+ example_input = gr.Textbox(label="Example Input", visible=False, interactive=False)
76
+ example_prediction = gr.Label(label="Model Sample Prediction", visible=False)
77
 
78
  with gr.Row():
79
  with gr.Accordion(
 
92
  for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
93
  column_mappings.append(gr.Dropdown(visible=False))
94
 
95
+ with gr.Accordion(label="Scanner Advanced Config (optional)", open=False):
96
+ scanners = gr.CheckboxGroup(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
99
  def get_scanners(uid):
 
101
  # we remove data_leakage from the default scanners
102
  # Reason: data_leakage barely raises any issues and takes too many requests
103
  # when using inference API, causing rate limit error
104
+ scan_config = [
105
+ "ethical_bias",
106
+ "text_perturbation",
107
+ "robustness",
108
+ "performance",
109
+ "underconfidence",
110
+ "overconfidence",
111
+ "spurious_correlation",
112
+ "data_leakage",
113
+ ]
114
  return gr.update(
115
  choices=scan_config, value=selected, label="Scan Settings", visible=True
116
  )
 
140
  inputs=[model_id_input],
141
  outputs=[dataset_id_input],
142
  ).then(
143
+ fn=check_dataset,
144
+ inputs=[dataset_id_input],
145
+ outputs=[dataset_config_input, dataset_split_input, loading_dataset_info],
146
  )
147
 
148
  gr.on(
149
+ triggers=[dataset_id_input.input, dataset_id_input.select],
150
  fn=check_dataset,
151
  inputs=[dataset_id_input],
152
+ outputs=[dataset_config_input, dataset_split_input, loading_dataset_info]
153
  )
154
 
155
+ dataset_config_input.change(fn=get_dataset_splits, inputs=[dataset_id_input, dataset_config_input], outputs=[dataset_split_input])
156
+
157
  gr.on(
158
  triggers=[model_id_input.change, dataset_id_input.change, dataset_config_input.change],
159
  fn=empty_column_mapping,
 
182
  gr.on(
183
  triggers=[
184
  model_id_input.change,
185
+ model_id_input.input,
186
  dataset_id_input.change,
187
  dataset_config_input.change,
188
  dataset_split_input.change,
 
194
  dataset_config_input,
195
  dataset_split_input,
196
  ],
197
+ outputs=[
198
+ example_btn,
199
+ first_line_ds,
200
+ validation_result,
201
+ example_input,
202
+ example_prediction,
203
+ column_mapping_accordion,],
204
  )
205
 
206
  gr.on(
 
214
  dataset_config_input,
215
  dataset_split_input,
216
  uid_label,
 
 
217
  ],
218
  outputs=[
219
+ validation_result,
220
  example_input,
221
  example_prediction,
222
  column_mapping_accordion,
223
  run_btn,
224
+ loading_validation,
225
  *column_mappings,
226
  ],
227
  )
 
236
  dataset_id_input,
237
  dataset_config_input,
238
  dataset_split_input,
 
 
239
  uid_label,
240
  ],
241
+ outputs=[
242
+ run_btn,
243
+ logs,
244
+ uid_label,
245
+ validation_result,
246
+ example_input,
247
+ example_prediction,
248
+ column_mapping_accordion,
249
+ ],
250
  )
251
 
252
  gr.on(
253
  triggers=[
 
 
254
  scanners.input,
255
  ],
256
  fn=enable_run_btn,
257
  inputs=[
258
  uid_label,
 
 
259
  model_id_input,
260
  dataset_id_input,
261
  dataset_config_input,
 
269
  fn=enable_run_btn,
270
  inputs=[
271
  uid_label,
 
 
272
  model_id_input,
273
  dataset_id_input,
274
  dataset_config_input,
utils/fetch_utils.py → fetch_utils.py RENAMED
File without changes
utils/io_utils.py → io_utils.py RENAMED
@@ -1,15 +1,25 @@
1
  import os
2
-
3
  import yaml
4
 
5
  YAML_PATH = "./cicd/configs"
6
  LOG_FILE = "temp_log"
7
 
 
8
 
9
  class Dumper(yaml.Dumper):
10
  def increase_indent(self, flow=False, *args, **kwargs):
11
  return super().increase_indent(flow=flow, indentless=False)
12
 
 
 
 
 
 
 
 
 
 
13
 
14
  def get_yaml_path(uid):
15
  if not os.path.exists(YAML_PATH):
@@ -72,6 +82,8 @@ def read_column_mapping(uid):
72
  config = yaml.load(f, Loader=yaml.FullLoader)
73
  if config:
74
  column_mapping = config.get("column_mapping", dict())
 
 
75
  return column_mapping
76
 
77
 
 
1
  import os
2
+ import logging
3
  import yaml
4
 
5
  YAML_PATH = "./cicd/configs"
6
  LOG_FILE = "temp_log"
7
 
8
+ logger = logging.getLogger(__name__)
9
 
10
  class Dumper(yaml.Dumper):
11
  def increase_indent(self, flow=False, *args, **kwargs):
12
  return super().increase_indent(flow=flow, indentless=False)
13
 
14
+ def get_submitted_yaml_path(uid):
15
+ if not os.path.exists(f"{YAML_PATH}/submitted"):
16
+ os.makedirs(f"{YAML_PATH}/submitted")
17
+ if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
18
+ logger.error(f"config.yaml does not exist for {uid}")
19
+ os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
20
+ if not os.path.exists(f"{YAML_PATH}/submitted/{uid}_config.yaml"):
21
+ os.system(f"cp {YAML_PATH}/{uid}_config.yaml {YAML_PATH}/submitted/{uid}_config.yaml")
22
+ return f"{YAML_PATH}/submitted/{uid}_config.yaml"
23
 
24
  def get_yaml_path(uid):
25
  if not os.path.exists(YAML_PATH):
 
82
  config = yaml.load(f, Loader=yaml.FullLoader)
83
  if config:
84
  column_mapping = config.get("column_mapping", dict())
85
+ if column_mapping is None:
86
+ column_mapping = {}
87
  return column_mapping
88
 
89
 
isolated_env.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import subprocess
3
 
4
- from utils.io_utils import write_log_to_user_file
5
 
6
 
7
  def prepare_venv(execution_id, deps):
 
1
  import os
2
  import subprocess
3
 
4
+ from io_utils import write_log_to_user_file
5
 
6
 
7
  def prepare_venv(execution_id, deps):
utils/leaderboard.py → leaderboard.py RENAMED
File without changes
utils/pipe.py → pipe.py RENAMED
File without changes
requirements.txt CHANGED
@@ -4,4 +4,6 @@ hf-transfer
4
  torch==2.0.1
5
  transformers
6
  datasets
 
 
7
  -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
 
4
  torch==2.0.1
5
  transformers
6
  datasets
7
+ tabulate
8
+ gradio[oauth]
9
  -e git+https://github.com/Giskard-AI/cicd.git#egg=giskard-cicd
utils/run_jobs.py → run_jobs.py RENAMED
@@ -6,7 +6,7 @@ import threading
6
  import time
7
  from pathlib import Path
8
 
9
- import utils.pipe as pipe
10
  from app_env import (
11
  HF_GSK_HUB_HF_TOKEN,
12
  HF_GSK_HUB_KEY,
@@ -17,9 +17,9 @@ from app_env import (
17
  HF_SPACE_ID,
18
  HF_WRITE_TOKEN,
19
  )
20
- from utils.io_utils import LOG_FILE, get_yaml_path, write_log_to_user_file
21
  from isolated_env import prepare_venv
22
- from utils.leaderboard import LEADERBOARD
23
 
24
  is_running = False
25
 
@@ -50,7 +50,6 @@ def prepare_env_and_get_command(
50
  d_id,
51
  config,
52
  split,
53
- inference,
54
  inference_token,
55
  uid,
56
  label_mapping,
@@ -60,10 +59,6 @@ def prepare_env_and_get_command(
60
  if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
61
  leaderboard_dataset = LEADERBOARD
62
 
63
- inference_type = "hf_pipeline"
64
- if inference and inference_token:
65
- inference_type = "hf_inference_api"
66
-
67
  executable = "giskard_scanner"
68
  try:
69
  # Copy the current requirements (might be changed)
@@ -98,9 +93,9 @@ def prepare_env_and_get_command(
98
  "--label_mapping",
99
  json.dumps(label_mapping),
100
  "--scan_config",
101
- get_yaml_path(uid),
102
  "--inference_type",
103
- inference_type,
104
  "--inference_api_token",
105
  inference_token,
106
  ]
 
6
  import time
7
  from pathlib import Path
8
 
9
+ import pipe
10
  from app_env import (
11
  HF_GSK_HUB_HF_TOKEN,
12
  HF_GSK_HUB_KEY,
 
17
  HF_SPACE_ID,
18
  HF_WRITE_TOKEN,
19
  )
20
+ from io_utils import LOG_FILE, get_submitted_yaml_path, write_log_to_user_file
21
  from isolated_env import prepare_venv
22
+ from leaderboard import LEADERBOARD
23
 
24
  is_running = False
25
 
 
50
  d_id,
51
  config,
52
  split,
 
53
  inference_token,
54
  uid,
55
  label_mapping,
 
59
  if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
60
  leaderboard_dataset = LEADERBOARD
61
 
 
 
 
 
62
  executable = "giskard_scanner"
63
  try:
64
  # Copy the current requirements (might be changed)
 
93
  "--label_mapping",
94
  json.dumps(label_mapping),
95
  "--scan_config",
96
+ get_submitted_yaml_path(uid),
97
  "--inference_type",
98
+ "hf_inference_api",
99
  "--inference_api_token",
100
  inference_token,
101
  ]
utils/text_classification.py → text_classification.py RENAMED
@@ -1,15 +1,19 @@
 
1
  import logging
2
 
3
  import datasets
4
  import huggingface_hub
 
 
5
  import requests
6
  import os
7
-
8
  from app_env import HF_WRITE_TOKEN
9
 
10
  logger = logging.getLogger(__name__)
11
  AUTH_CHECK_URL = "https://huggingface.co/api/whoami-v2"
12
 
 
 
13
  class HuggingFaceInferenceAPIResponse:
14
  def __init__(self, message):
15
  self.message = message
@@ -18,23 +22,28 @@ class HuggingFaceInferenceAPIResponse:
18
  def get_labels_and_features_from_dataset(ds):
19
  try:
20
  dataset_features = ds.features
21
- label_keys = [i for i in dataset_features.keys() if i.startswith('label')]
 
 
22
  if len(label_keys) == 0: # no labels found
23
  # return everything for post processing
24
- return list(dataset_features.keys()), list(dataset_features.keys())
 
 
25
  if not isinstance(dataset_features[label_keys[0]], datasets.ClassLabel):
26
- if hasattr(dataset_features[label_keys[0]], 'feature'):
27
  label_feat = dataset_features[label_keys[0]].feature
28
  labels = label_feat.names
 
 
29
  else:
30
  labels = dataset_features[label_keys[0]].names
31
- features = [f for f in dataset_features.keys() if not f.startswith("label")]
32
- return labels, features
33
  except Exception as e:
34
  logging.warning(
35
  f"Get Labels/Features Failed for dataset: {e}"
36
  )
37
- return None, None
38
 
39
  def check_model_task(model_id):
40
  # check if model is valid on huggingface
@@ -78,11 +87,19 @@ def hf_inference_api(model_id, hf_token, payload):
78
  url = f"{hf_inference_api_endpoint}/models/{model_id}"
79
  headers = {"Authorization": f"Bearer {hf_token}"}
80
  response = requests.post(url, headers=headers, json=payload)
 
81
  if not hasattr(response, "status_code") or response.status_code != 200:
82
  logger.warning(f"Request to inference API returns {response}")
 
83
  try:
 
 
 
 
 
 
84
  return response.json()
85
- except Exception:
86
  return {"error": response.content}
87
 
88
  def preload_hf_inference_api(model_id):
@@ -90,6 +107,165 @@ def preload_hf_inference_api(model_id):
90
  hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
91
  hf_inference_api(model_id, hf_token, payload)
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  def check_dataset_features_validity(d_id, config, split):
94
  # We assume dataset is ok here
95
  ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
@@ -173,9 +349,51 @@ def get_sample_prediction(ppl, df, column_mapping, id2label_mapping):
173
  return prediction_input, prediction_result
174
 
175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  def strip_model_id_from_url(model_id):
177
  if model_id.startswith("https://huggingface.co/"):
178
- return "/".join(model_id.split("/")[-2])
179
  return model_id
180
 
181
  def check_hf_token_validity(hf_token):
 
1
+ import json
2
  import logging
3
 
4
  import datasets
5
  import huggingface_hub
6
+ import pandas as pd
7
+ from transformers import pipeline
8
  import requests
9
  import os
 
10
  from app_env import HF_WRITE_TOKEN
11
 
12
  logger = logging.getLogger(__name__)
13
  AUTH_CHECK_URL = "https://huggingface.co/api/whoami-v2"
14
 
15
+ logger = logging.getLogger(__file__)
16
+
17
  class HuggingFaceInferenceAPIResponse:
18
  def __init__(self, message):
19
  self.message = message
 
22
  def get_labels_and_features_from_dataset(ds):
23
  try:
24
  dataset_features = ds.features
25
+ label_keys = [i for i in dataset_features.keys() if i.startswith("label")]
26
+ features = [f for f in dataset_features.keys() if not f.startswith("label")]
27
+
28
  if len(label_keys) == 0: # no labels found
29
  # return everything for post processing
30
+ return list(dataset_features.keys()), list(dataset_features.keys()), None
31
+
32
+ labels = None
33
  if not isinstance(dataset_features[label_keys[0]], datasets.ClassLabel):
34
+ if hasattr(dataset_features[label_keys[0]], "feature"):
35
  label_feat = dataset_features[label_keys[0]].feature
36
  labels = label_feat.names
37
+ else:
38
+ labels = ds.unique(label_keys[0])
39
  else:
40
  labels = dataset_features[label_keys[0]].names
41
+ return labels, features, label_keys
 
42
  except Exception as e:
43
  logging.warning(
44
  f"Get Labels/Features Failed for dataset: {e}"
45
  )
46
+ return None, None, None
47
 
48
  def check_model_task(model_id):
49
  # check if model is valid on huggingface
 
87
  url = f"{hf_inference_api_endpoint}/models/{model_id}"
88
  headers = {"Authorization": f"Bearer {hf_token}"}
89
  response = requests.post(url, headers=headers, json=payload)
90
+
91
  if not hasattr(response, "status_code") or response.status_code != 200:
92
  logger.warning(f"Request to inference API returns {response}")
93
+
94
  try:
95
+ output = response.json()
96
+ if "error" in output and "Input is too long" in output["error"]:
97
+ payload.update({"parameters": {"truncation": True, "max_length": 512}})
98
+ response = requests.post(url, headers=headers, json=payload)
99
+ if not hasattr(response, "status_code") or response.status_code != 200:
100
+ logger.warning(f"Request to inference API returns {response}")
101
  return response.json()
102
+ except Exception:
103
  return {"error": response.content}
104
 
105
  def preload_hf_inference_api(model_id):
 
107
  hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
108
  hf_inference_api(model_id, hf_token, payload)
109
 
110
+ def check_model_pipeline(model_id):
111
+ try:
112
+ task = huggingface_hub.model_info(model_id).pipeline_tag
113
+ except Exception:
114
+ return None
115
+
116
+ try:
117
+ ppl = pipeline(task=task, model=model_id)
118
+
119
+ return ppl
120
+ except Exception:
121
+ return None
122
+
123
+
124
+ def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
125
+ for model_label in id2label_mapping.keys():
126
+ if model_label.upper() == label.upper():
127
+ return model_label, label
128
+ return None, label
129
+
130
+
131
+ def text_classification_map_model_and_dataset_labels(id2label, dataset_features):
132
+ id2label_mapping = {id2label[k]: None for k in id2label.keys()}
133
+ dataset_labels = None
134
+ for feature in dataset_features.values():
135
+ if not isinstance(feature, datasets.ClassLabel):
136
+ continue
137
+ if len(feature.names) != len(id2label_mapping.keys()):
138
+ continue
139
+
140
+ dataset_labels = feature.names
141
+ # Try to match labels
142
+ for label in feature.names:
143
+ if label in id2label_mapping.keys():
144
+ model_label = label
145
+ else:
146
+ # Try to find case unsensative
147
+ model_label, label = text_classificaiton_match_label_case_unsensative(
148
+ id2label_mapping, label
149
+ )
150
+ if model_label is not None:
151
+ id2label_mapping[model_label] = label
152
+ else:
153
+ print(f"Label {label} is not found in model labels")
154
+
155
+ return id2label_mapping, dataset_labels
156
+
157
+
158
+ """
159
+ params:
160
+ column_mapping: dict
161
+ example: {
162
+ "text": "sentences",
163
+ "label": {
164
+ "label0": "LABEL_0",
165
+ "label1": "LABEL_1"
166
+ }
167
+ }
168
+ ppl: pipeline
169
+ """
170
+
171
+
172
+ def check_column_mapping_keys_validity(column_mapping, ppl):
173
+ # get the element in all the list elements
174
+ column_mapping = json.loads(column_mapping)
175
+ if "data" not in column_mapping.keys():
176
+ return True
177
+ user_labels = set([pair[0] for pair in column_mapping["data"]])
178
+ model_labels = set([pair[1] for pair in column_mapping["data"]])
179
+
180
+ id2label = ppl.model.config.id2label
181
+ original_labels = set(id2label.values())
182
+
183
+ return user_labels == model_labels == original_labels
184
+
185
+
186
+ """
187
+ params:
188
+ column_mapping: dict
189
+ dataset_features: dict
190
+ example: {
191
+ 'text': Value(dtype='string', id=None),
192
+ 'label': ClassLabel(names=['negative', 'neutral', 'positive'], id=None)
193
+ }
194
+ """
195
+
196
+
197
+ def infer_text_input_column(column_mapping, dataset_features):
198
+ # Check whether we need to infer the text input column
199
+ infer_text_input_column = True
200
+ feature_map_df = None
201
+
202
+ if "text" in column_mapping.keys():
203
+ dataset_text_column = column_mapping["text"]
204
+ if dataset_text_column in dataset_features.keys():
205
+ infer_text_input_column = False
206
+ else:
207
+ logging.warning(f"Provided {dataset_text_column} is not in Dataset columns")
208
+
209
+ if infer_text_input_column:
210
+ # Try to retrieve one
211
+ candidates = [
212
+ f for f in dataset_features if dataset_features[f].dtype == "string"
213
+ ]
214
+ feature_map_df = pd.DataFrame(
215
+ {"Dataset Features": [candidates[0]], "Model Input Features": ["text"]}
216
+ )
217
+ if len(candidates) > 0:
218
+ logging.debug(f"Candidates are {candidates}")
219
+ column_mapping["text"] = candidates[0]
220
+
221
+ return column_mapping, feature_map_df
222
+
223
+
224
+ """
225
+ params:
226
+ column_mapping: dict
227
+ id2label_mapping: dict
228
+ example:
229
+ id2label_mapping: {
230
+ 'negative': 'negative',
231
+ 'neutral': 'neutral',
232
+ 'positive': 'positive'
233
+ }
234
+ """
235
+
236
+
237
+ def infer_output_label_column(
238
+ column_mapping, id2label_mapping, id2label, dataset_labels
239
+ ):
240
+ # Check whether we need to infer the output label column
241
+ if "data" in column_mapping.keys():
242
+ if isinstance(column_mapping["data"], list):
243
+ # Use the column mapping passed by user
244
+ for user_label, model_label in column_mapping["data"]:
245
+ id2label_mapping[model_label] = user_label
246
+ elif None in id2label_mapping.values():
247
+ column_mapping["label"] = {i: None for i in id2label.keys()}
248
+ return column_mapping, None
249
+
250
+ if "data" not in column_mapping.keys():
251
+ # Column mapping should contain original model labels
252
+ column_mapping["label"] = {
253
+ str(i): id2label_mapping[label]
254
+ for i, label in zip(id2label.keys(), dataset_labels)
255
+ }
256
+
257
+ id2label_df = pd.DataFrame(
258
+ {
259
+ "Dataset Labels": dataset_labels,
260
+ "Model Prediction Labels": [
261
+ id2label_mapping[label] for label in dataset_labels
262
+ ],
263
+ }
264
+ )
265
+
266
+ return column_mapping, id2label_df
267
+
268
+
269
  def check_dataset_features_validity(d_id, config, split):
270
  # We assume dataset is ok here
271
  ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
 
349
  return prediction_input, prediction_result
350
 
351
 
352
+ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
353
+ # load dataset as pd DataFrame
354
+ # get features column from dataset
355
+ df, dataset_features = check_dataset_features_validity(d_id, config, split)
356
+
357
+ column_mapping, feature_map_df = infer_text_input_column(
358
+ column_mapping, dataset_features
359
+ )
360
+ if feature_map_df is None:
361
+ # dataset does not have any features
362
+ return None, None, None, None, None
363
+
364
+ # Retrieve all labels
365
+ id2label = ppl.model.config.id2label
366
+
367
+ # Infer labels
368
+ id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(
369
+ id2label, dataset_features
370
+ )
371
+ column_mapping, id2label_df = infer_output_label_column(
372
+ column_mapping, id2label_mapping, id2label, dataset_labels
373
+ )
374
+ if id2label_df is None:
375
+ # does not able to infer output label column
376
+ return column_mapping, None, None, None, feature_map_df
377
+
378
+ # Get a sample prediction
379
+ prediction_input, prediction_result = get_sample_prediction(
380
+ ppl, df, column_mapping, id2label_mapping
381
+ )
382
+ if prediction_result is None:
383
+ # does not able to get a sample prediction
384
+ return column_mapping, prediction_input, None, id2label_df, feature_map_df
385
+
386
+ return (
387
+ column_mapping,
388
+ prediction_input,
389
+ prediction_result,
390
+ id2label_df,
391
+ feature_map_df,
392
+ )
393
+
394
  def strip_model_id_from_url(model_id):
395
  if model_id.startswith("https://huggingface.co/"):
396
+ return "/".join(model_id.split("/")[-2:])
397
  return model_id
398
 
399
  def check_hf_token_validity(hf_token):
utils/ui_helpers.py → text_classification_ui_helpers.py RENAMED
@@ -7,10 +7,15 @@ import datasets
7
  import gradio as gr
8
  import pandas as pd
9
 
10
- import utils.leaderboard as leaderboard
11
- from utils.io_utils import read_column_mapping, write_column_mapping
12
- from utils.run_jobs import save_job_to_pipe
13
- from utils.text_classification import (
 
 
 
 
 
14
  strip_model_id_from_url,
15
  check_model_task,
16
  preload_hf_inference_api,
@@ -19,17 +24,18 @@ from utils.text_classification import (
19
  check_hf_token_validity,
20
  HuggingFaceInferenceAPIResponse,
21
  )
22
- from utils.wordings import (
23
  CHECK_CONFIG_OR_SPLIT_RAW,
24
  CONFIRM_MAPPING_DETAILS_FAIL_RAW,
25
  MAPPING_STYLED_ERROR_WARNING,
26
  NOT_TEXT_CLASSIFICATION_MODEL_RAW,
27
  UNMATCHED_MODEL_DATASET_STYLED_ERROR,
28
  CHECK_LOG_SECTION_RAW,
29
- get_styled_input,
30
  get_dataset_fetch_error_raw,
31
  )
32
  import os
 
33
 
34
  MAX_LABELS = 40
35
  MAX_FEATURES = 20
@@ -37,9 +43,6 @@ MAX_FEATURES = 20
37
  ds_dict = None
38
  ds_config = None
39
 
40
- logger = logging.getLogger(__file__)
41
-
42
-
43
  def get_related_datasets_from_leaderboard(model_id):
44
  records = leaderboard.records
45
  model_id = strip_model_id_from_url(model_id)
@@ -47,9 +50,20 @@ def get_related_datasets_from_leaderboard(model_id):
47
  datasets_unique = list(model_records["dataset_id"].unique())
48
 
49
  if len(datasets_unique) == 0:
50
- return gr.update(choices=[], value="")
51
 
52
- return gr.update(choices=datasets_unique, value="")
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def check_dataset(dataset_id):
55
  logger.info(f"Loading {dataset_id}")
@@ -61,9 +75,7 @@ def check_dataset(dataset_id):
61
  gr.update(visible=False),
62
  ""
63
  )
64
- splits = datasets.get_dataset_split_names(
65
- dataset_id, configs[0], trust_remote_code=True
66
- )
67
  return (
68
  gr.update(choices=configs, value=configs[0], visible=True),
69
  gr.update(choices=splits, value=splits[0], visible=True),
@@ -116,6 +128,7 @@ def export_mappings(all_mappings, key, subkeys, values):
116
  all_mappings[key][subkey] = values[i % len(values)]
117
  return all_mappings
118
 
 
119
  def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels, uid):
120
  all_mappings = read_column_mapping(uid)
121
  # For flattened raw datasets with no labels
@@ -125,7 +138,7 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
125
  ds_labels = list(shared_labels)
126
  if len(ds_labels) > MAX_LABELS:
127
  ds_labels = ds_labels[:MAX_LABELS]
128
- gr.Warning(f"The number of labels is truncated to length {MAX_LABELS}")
129
 
130
  # sort labels to make sure the order is consistent
131
  # prediction gives the order based on probability
@@ -164,35 +177,70 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
164
 
165
  return lables + features
166
 
 
167
  def precheck_model_ds_enable_example_btn(
168
  model_id, dataset_id, dataset_config, dataset_split
169
- ):
170
- if model_id == "" or dataset_id == "":
171
- return (gr.update(interactive=False), gr.update(visible=False), "")
172
  model_id = strip_model_id_from_url(model_id)
173
  model_task = check_model_task(model_id)
174
  preload_hf_inference_api(model_id)
175
- if model_task is None or model_task != "text-classification":
176
- gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
177
- return (gr.update(interactive=False), gr.update(visible=False), "")
178
-
179
  if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
180
- return (gr.update(interactive=False), gr.update(visible=False), "")
181
-
 
 
 
 
 
 
 
182
  try:
183
  ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
184
  df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
185
- ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split])
 
 
 
 
 
 
 
 
 
 
 
186
 
187
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
188
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
189
- return (gr.update(interactive=False), gr.update(value=df, visible=True), "")
 
 
 
 
 
 
 
190
 
191
- return (gr.update(interactive=True), gr.update(value=df, visible=True), "")
 
 
 
 
 
 
 
192
  except Exception as e:
193
  # Config or split wrong
194
  logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
195
- return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "")
 
 
 
 
 
 
 
196
 
197
 
198
  def align_columns_and_show_prediction(
@@ -201,8 +249,8 @@ def align_columns_and_show_prediction(
201
  dataset_config,
202
  dataset_split,
203
  uid,
204
- run_inference,
205
- inference_token,
206
  ):
207
  model_id = strip_model_id_from_url(model_id)
208
  model_task = check_model_task(model_id)
@@ -221,7 +269,7 @@ def align_columns_and_show_prediction(
221
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
222
  ]
223
 
224
- hf_token = os.environ.get("HF_WRITE_TOKEN", default="")
225
 
226
  prediction_input, prediction_response = get_example_prediction(
227
  model_id, dataset_id, dataset_config, dataset_split, hf_token
@@ -229,6 +277,7 @@ def align_columns_and_show_prediction(
229
 
230
  if prediction_input is None or prediction_response is None:
231
  return (
 
232
  gr.update(visible=False),
233
  gr.update(visible=False),
234
  gr.update(visible=False, open=False),
@@ -239,6 +288,7 @@ def align_columns_and_show_prediction(
239
 
240
  if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
241
  return (
 
242
  gr.update(visible=False),
243
  gr.update(visible=False),
244
  gr.update(visible=False, open=False),
@@ -250,12 +300,13 @@ def align_columns_and_show_prediction(
250
  model_labels = list(prediction_response.keys())
251
 
252
  ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
253
- ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
254
 
255
  # when dataset does not have labels or features
256
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
257
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
258
  return (
 
259
  gr.update(visible=False),
260
  gr.update(visible=False),
261
  gr.update(visible=False, open=False),
@@ -268,6 +319,7 @@ def align_columns_and_show_prediction(
268
  return (
269
  gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
270
  gr.update(visible=False),
 
271
  gr.update(visible=False, open=False),
272
  gr.update(interactive=False),
273
  "",
@@ -289,18 +341,20 @@ def align_columns_and_show_prediction(
289
  ):
290
  return (
291
  gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
292
- gr.update(visible=False),
 
293
  gr.update(visible=True, open=True),
294
- gr.update(interactive=(run_inference and inference_token != "")),
295
  "",
296
  *column_mappings,
297
  )
298
 
299
  return (
300
- gr.update(value=get_styled_input(prediction_input), visible=True),
 
301
  gr.update(value=prediction_response, visible=True),
302
  gr.update(visible=True, open=False),
303
- gr.update(interactive=(run_inference and inference_token != "")),
304
  "",
305
  *column_mappings,
306
  )
@@ -308,18 +362,20 @@ def align_columns_and_show_prediction(
308
 
309
  def check_column_mapping_keys_validity(all_mappings):
310
  if all_mappings is None:
 
311
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
312
  return False
313
 
314
  if "labels" not in all_mappings.keys():
315
- gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
316
  return False
317
 
318
  return True
319
 
320
- def enable_run_btn(uid, run_inference, inference_token, model_id, dataset_id, dataset_config, dataset_split):
321
- if not run_inference or inference_token == "":
322
- logger.warn("Inference API is not enabled")
 
323
  return gr.update(interactive=False)
324
  if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
325
  logger.warn("Model id or dataset id is not selected")
@@ -330,26 +386,27 @@ def enable_run_btn(uid, run_inference, inference_token, model_id, dataset_id, da
330
  logger.warn("Column mapping is not valid")
331
  return gr.update(interactive=False)
332
 
333
- if not check_hf_token_validity(inference_token):
334
- logger.warn("HF token is not valid")
335
- return gr.update(interactive=False)
336
- return gr.update(interactive=True)
337
-
338
- def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features):
339
  label_mapping = {}
340
  if len(all_mappings["labels"].keys()) != len(ds_labels):
341
- logger.warn("Label mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
342
 
343
  if len(all_mappings["features"].keys()) != len(ds_features):
344
- logger.warn("Feature mapping corrupted: " + CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
345
 
346
  for i, label in zip(range(len(ds_labels)), ds_labels):
347
  # align the saved labels with dataset labels order
348
  label_mapping.update({str(i): all_mappings["labels"][label]})
349
 
350
  if "features" not in all_mappings.keys():
 
351
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
 
352
  feature_mapping = all_mappings["features"]
 
 
353
  return label_mapping, feature_mapping
354
 
355
  def show_hf_token_info(token):
@@ -358,16 +415,18 @@ def show_hf_token_info(token):
358
  return gr.update(visible=True)
359
  return gr.update(visible=False)
360
 
361
- def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
 
 
362
  all_mappings = read_column_mapping(uid)
363
  if not check_column_mapping_keys_validity(all_mappings):
364
  return (gr.update(interactive=True), gr.update(visible=False))
365
 
366
  # get ds labels and features again for alignment
367
  ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
368
- ds_labels, ds_features = get_labels_and_features_from_dataset(ds)
369
- label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features)
370
-
371
  eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
372
  save_job_to_pipe(
373
  uid,
@@ -376,8 +435,7 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
376
  d_id,
377
  config,
378
  split,
379
- inference,
380
- inference_token,
381
  uid,
382
  label_mapping,
383
  feature_mapping,
@@ -387,8 +445,16 @@ def try_submit(m_id, d_id, config, split, inference, inference_token, uid):
387
  )
388
  gr.Info("Your evaluation has been submitted")
389
 
 
 
 
 
390
  return (
391
  gr.update(interactive=False), # Submit button
392
  gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
393
- uuid.uuid4(), # Allocate a new uuid
 
 
 
 
394
  )
 
7
  import gradio as gr
8
  import pandas as pd
9
 
10
+ import leaderboard
11
+ from io_utils import (
12
+ read_column_mapping,
13
+ write_column_mapping,
14
+ read_scanners,
15
+ write_scanners,
16
+ )
17
+ from run_jobs import save_job_to_pipe
18
+ from text_classification import (
19
  strip_model_id_from_url,
20
  check_model_task,
21
  preload_hf_inference_api,
 
24
  check_hf_token_validity,
25
  HuggingFaceInferenceAPIResponse,
26
  )
27
+ from wordings import (
28
  CHECK_CONFIG_OR_SPLIT_RAW,
29
  CONFIRM_MAPPING_DETAILS_FAIL_RAW,
30
  MAPPING_STYLED_ERROR_WARNING,
31
  NOT_TEXT_CLASSIFICATION_MODEL_RAW,
32
  UNMATCHED_MODEL_DATASET_STYLED_ERROR,
33
  CHECK_LOG_SECTION_RAW,
34
+ VALIDATED_MODEL_DATASET_STYLED,
35
  get_dataset_fetch_error_raw,
36
  )
37
  import os
38
+ from app_env import HF_WRITE_TOKEN
39
 
40
  MAX_LABELS = 40
41
  MAX_FEATURES = 20
 
43
  ds_dict = None
44
  ds_config = None
45
 
 
 
 
46
  def get_related_datasets_from_leaderboard(model_id):
47
  records = leaderboard.records
48
  model_id = strip_model_id_from_url(model_id)
 
50
  datasets_unique = list(model_records["dataset_id"].unique())
51
 
52
  if len(datasets_unique) == 0:
53
+ return gr.update(choices=[])
54
 
55
+ return gr.update(choices=datasets_unique)
56
+
57
+
58
+ logger = logging.getLogger(__file__)
59
+
60
+ def get_dataset_splits(dataset_id, dataset_config):
61
+ try:
62
+ splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
63
+ return gr.update(choices=splits, value=splits[0], visible=True)
64
+ except Exception as e:
65
+ logger.warn(f"Check your dataset {dataset_id} and config {dataset_config}: {e}")
66
+ return gr.update(visible=False)
67
 
68
  def check_dataset(dataset_id):
69
  logger.info(f"Loading {dataset_id}")
 
75
  gr.update(visible=False),
76
  ""
77
  )
78
+ splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
 
 
79
  return (
80
  gr.update(choices=configs, value=configs[0], visible=True),
81
  gr.update(choices=splits, value=splits[0], visible=True),
 
128
  all_mappings[key][subkey] = values[i % len(values)]
129
  return all_mappings
130
 
131
+
132
  def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels, uid):
133
  all_mappings = read_column_mapping(uid)
134
  # For flattened raw datasets with no labels
 
138
  ds_labels = list(shared_labels)
139
  if len(ds_labels) > MAX_LABELS:
140
  ds_labels = ds_labels[:MAX_LABELS]
141
+ gr.Warning(f"Too many labels to display for this spcae. We do not support more than {MAX_LABELS} in this space. You can use cli tool at https://github.com/Giskard-AI/cicd.")
142
 
143
  # sort labels to make sure the order is consistent
144
  # prediction gives the order based on probability
 
177
 
178
  return lables + features
179
 
180
+
181
  def precheck_model_ds_enable_example_btn(
182
  model_id, dataset_id, dataset_config, dataset_split
183
+ ):
 
 
184
  model_id = strip_model_id_from_url(model_id)
185
  model_task = check_model_task(model_id)
186
  preload_hf_inference_api(model_id)
187
+
 
 
 
188
  if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
189
+ return (
190
+ gr.update(interactive=False),
191
+ gr.update(visible=False),
192
+ gr.update(visible=False),
193
+ gr.update(visible=False),
194
+ gr.update(visible=False),
195
+ gr.update(visible=False),
196
+ )
197
+
198
  try:
199
  ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
200
  df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
201
+ ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds[dataset_split])
202
+
203
+ if model_task is None or model_task != "text-classification":
204
+ gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
205
+ return (
206
+ gr.update(interactive=False),
207
+ gr.update(value=df, visible=True),
208
+ gr.update(visible=False),
209
+ gr.update(visible=False),
210
+ gr.update(visible=False),
211
+ gr.update(visible=False),
212
+ )
213
 
214
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
215
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
216
+ return (
217
+ gr.update(interactive=False),
218
+ gr.update(value=df, visible=True),
219
+ gr.update(visible=False),
220
+ gr.update(visible=False),
221
+ gr.update(visible=False),
222
+ gr.update(visible=False),
223
+ )
224
 
225
+ return (
226
+ gr.update(interactive=True),
227
+ gr.update(value=df, visible=True),
228
+ gr.update(visible=False),
229
+ gr.update(visible=False),
230
+ gr.update(visible=False),
231
+ gr.update(visible=False),
232
+ )
233
  except Exception as e:
234
  # Config or split wrong
235
  logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
236
+ return (
237
+ gr.update(interactive=False),
238
+ gr.update(visible=False),
239
+ gr.update(visible=False),
240
+ gr.update(visible=False),
241
+ gr.update(visible=False),
242
+ gr.update(visible=False),
243
+ )
244
 
245
 
246
  def align_columns_and_show_prediction(
 
249
  dataset_config,
250
  dataset_split,
251
  uid,
252
+ profile: gr.OAuthProfile | None,
253
+ oauth_token: gr.OAuthToken | None,
254
  ):
255
  model_id = strip_model_id_from_url(model_id)
256
  model_task = check_model_task(model_id)
 
269
  gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
270
  ]
271
 
272
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
273
 
274
  prediction_input, prediction_response = get_example_prediction(
275
  model_id, dataset_id, dataset_config, dataset_split, hf_token
 
277
 
278
  if prediction_input is None or prediction_response is None:
279
  return (
280
+ gr.update(visible=False),
281
  gr.update(visible=False),
282
  gr.update(visible=False),
283
  gr.update(visible=False, open=False),
 
288
 
289
  if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
290
  return (
291
+ gr.update(visible=False),
292
  gr.update(visible=False),
293
  gr.update(visible=False),
294
  gr.update(visible=False, open=False),
 
300
  model_labels = list(prediction_response.keys())
301
 
302
  ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
303
+ ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds)
304
 
305
  # when dataset does not have labels or features
306
  if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
307
  gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
308
  return (
309
+ gr.update(visible=False),
310
  gr.update(visible=False),
311
  gr.update(visible=False),
312
  gr.update(visible=False, open=False),
 
319
  return (
320
  gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
321
  gr.update(visible=False),
322
+ gr.update(visible=False),
323
  gr.update(visible=False, open=False),
324
  gr.update(interactive=False),
325
  "",
 
341
  ):
342
  return (
343
  gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
344
+ gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
345
+ gr.update(value=prediction_response, visible=True),
346
  gr.update(visible=True, open=True),
347
+ gr.update(interactive=(profile is not None and oauth_token is not None)),
348
  "",
349
  *column_mappings,
350
  )
351
 
352
  return (
353
+ gr.update(value=VALIDATED_MODEL_DATASET_STYLED, visible=True),
354
+ gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
355
  gr.update(value=prediction_response, visible=True),
356
  gr.update(visible=True, open=False),
357
+ gr.update(interactive=(profile is not None and oauth_token is not None)),
358
  "",
359
  *column_mappings,
360
  )
 
362
 
363
  def check_column_mapping_keys_validity(all_mappings):
364
  if all_mappings is None:
365
+ logger.warning("all_mapping is None")
366
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
367
  return False
368
 
369
  if "labels" not in all_mappings.keys():
370
+ logger.warning(f"Label mapping is not valid, all_mappings: {all_mappings}")
371
  return False
372
 
373
  return True
374
 
375
+ def enable_run_btn(uid, model_id, dataset_id, dataset_config, dataset_split, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
376
+ if profile is None:
377
+ return gr.update(interactive=False)
378
+ if oath_token is None:
379
  return gr.update(interactive=False)
380
  if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
381
  logger.warn("Model id or dataset id is not selected")
 
386
  logger.warn("Column mapping is not valid")
387
  return gr.update(interactive=False)
388
 
389
+ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys=None):
 
 
 
 
 
390
  label_mapping = {}
391
  if len(all_mappings["labels"].keys()) != len(ds_labels):
392
+ logger.warn(f"""Label mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
393
+ \nall_mappings: {all_mappings}\nds_labels: {ds_labels}""")
394
 
395
  if len(all_mappings["features"].keys()) != len(ds_features):
396
+ logger.warn(f"""Feature mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
397
+ \nall_mappings: {all_mappings}\nds_features: {ds_features}""")
398
 
399
  for i, label in zip(range(len(ds_labels)), ds_labels):
400
  # align the saved labels with dataset labels order
401
  label_mapping.update({str(i): all_mappings["labels"][label]})
402
 
403
  if "features" not in all_mappings.keys():
404
+ logger.warning("features not in all_mappings")
405
  gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
406
+
407
  feature_mapping = all_mappings["features"]
408
+ if len(label_keys) > 0:
409
+ feature_mapping.update({"label": label_keys[0]})
410
  return label_mapping, feature_mapping
411
 
412
  def show_hf_token_info(token):
 
415
  return gr.update(visible=True)
416
  return gr.update(visible=False)
417
 
418
+ def try_submit(m_id, d_id, config, split, uid, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
419
+ print(oath_token.token)
420
+ print(".>>>>>>>>>>>>>>>>>>>>>>")
421
  all_mappings = read_column_mapping(uid)
422
  if not check_column_mapping_keys_validity(all_mappings):
423
  return (gr.update(interactive=True), gr.update(visible=False))
424
 
425
  # get ds labels and features again for alignment
426
  ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
427
+ ds_labels, ds_features, label_keys = get_labels_and_features_from_dataset(ds)
428
+ label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys)
429
+
430
  eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
431
  save_job_to_pipe(
432
  uid,
 
435
  d_id,
436
  config,
437
  split,
438
+ oath_token.token,
 
439
  uid,
440
  label_mapping,
441
  feature_mapping,
 
445
  )
446
  gr.Info("Your evaluation has been submitted")
447
 
448
+ new_uid = uuid.uuid4()
449
+ scanners = read_scanners(uid)
450
+ write_scanners(scanners, new_uid)
451
+
452
  return (
453
  gr.update(interactive=False), # Submit button
454
  gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
455
+ new_uid, # Allocate a new uuid
456
+ gr.update(visible=False),
457
+ gr.update(visible=False),
458
+ gr.update(visible=False),
459
+ gr.update(visible=False),
460
  )
utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ import yaml
4
+
5
+
6
+ # read scanners from yaml file
7
+ # return a list of scanners
8
+ def read_scanners(path):
9
+ scanners = []
10
+ with open(path, "r") as f:
11
+ config = yaml.load(f, Loader=yaml.FullLoader)
12
+ scanners = config.get("detectors", None)
13
+ return scanners
14
+
15
+
16
+ # convert a list of scanners to yaml file
17
+ def write_scanners(scanners):
18
+ with open("./scan_config.yaml", "w") as f:
19
+ # save scanners to detectors in yaml
20
+ yaml.dump({"detectors": scanners}, f)
21
+
22
+
23
+ # convert column mapping dataframe to json
24
+ def convert_column_mapping_to_json(df, label=""):
25
+ column_mapping = {}
26
+ column_mapping[label] = []
27
+ for _, row in df.iterrows():
28
+ column_mapping[label].append(row.tolist())
29
+ return column_mapping
utils/wordings.py → wordings.py RENAMED
@@ -2,27 +2,28 @@ INTRODUCTION_MD = """
2
  <h1 style="text-align: center;">
3
  🐢Giskard Evaluator - Text Classification
4
  </h1>
5
- Welcome to the Giskard Evaluator Space! Get a model vulnerability report immediately by simply sharing your model and dataset id below.
 
6
  """
7
  CONFIRM_MAPPING_DETAILS_MD = """
8
  <h1 style="text-align: center;">
9
  Confirm Pre-processing Details
10
  </h1>
11
- Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model.
12
  """
13
  CONFIRM_MAPPING_DETAILS_FAIL_MD = """
14
  <h1 style="text-align: center;">
15
  Confirm Pre-processing Details
16
  </h1>
17
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
18
  """
19
 
20
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
21
- We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
22
  """
23
 
24
  CHECK_CONFIG_OR_SPLIT_RAW = """
25
- We're unanle to extract labels or features from your dataset. Please check your dataset config or split selection.
26
  """
27
 
28
  CHECK_LOG_SECTION_RAW = """
@@ -38,7 +39,7 @@ PREDICTION_SAMPLE_MD = """
38
 
39
  MAPPING_STYLED_ERROR_WARNING = """
40
  <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
41
- ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. <b>Please manually check the mapping below.</b>
42
  </h3>
43
  """
44
 
@@ -57,7 +58,11 @@ USE_INFERENCE_API_TIP = """
57
  <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
58
  Hugging Face Inference API
59
  </a>
60
- . Please input your <a href="https://huggingface.co/settings/tokens">Hugging Face token</a> to do so.
 
 
 
 
61
  """
62
 
63
  HF_TOKEN_INVALID_STYLED= """
@@ -66,10 +71,10 @@ HF_TOKEN_INVALID_STYLED= """
66
  </p>
67
  """
68
 
 
 
 
 
 
69
  def get_dataset_fetch_error_raw(error):
70
  return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""
71
-
72
- def get_styled_input(input):
73
- return f"""<h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
74
- Your model and dataset have been validated! <br /> Sample input: {input}
75
- </h3>"""
 
2
  <h1 style="text-align: center;">
3
  🐢Giskard Evaluator - Text Classification
4
  </h1>
5
+ Welcome to the Giskard Evaluator Space! Get a model vulnerability report immediately by simply sharing your model and dataset id below.
6
+ You can also checkout our library documentation <a href="https://docs.giskard.ai/en/latest/getting_started/quickstart/index.html">here</a>.
7
  """
8
  CONFIRM_MAPPING_DETAILS_MD = """
9
  <h1 style="text-align: center;">
10
  Confirm Pre-processing Details
11
  </h1>
12
+ Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model. You can select the output variable's labels from the dropdowns below.
13
  """
14
  CONFIRM_MAPPING_DETAILS_FAIL_MD = """
15
  <h1 style="text-align: center;">
16
  Confirm Pre-processing Details
17
  </h1>
18
+ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
19
  """
20
 
21
  CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
22
+ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
23
  """
24
 
25
  CHECK_CONFIG_OR_SPLIT_RAW = """
26
+ Please check your dataset config or split.
27
  """
28
 
29
  CHECK_LOG_SECTION_RAW = """
 
39
 
40
  MAPPING_STYLED_ERROR_WARNING = """
41
  <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
42
+ ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
43
  </h3>
44
  """
45
 
 
58
  <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
59
  Hugging Face Inference API
60
  </a>
61
+ . Please input your <a href="https://huggingface.co/settings/tokens">Hugging Face token</a> to do so. You can find it <a href="https://huggingface.co/settings/tokens">here</a>.
62
+ """
63
+
64
+ LOG_IN_TIPS = """
65
+ To use the Hugging Face Inference API, you need to log in to your Hugging Face account.
66
  """
67
 
68
  HF_TOKEN_INVALID_STYLED= """
 
71
  </p>
72
  """
73
 
74
+ VALIDATED_MODEL_DATASET_STYLED = """
75
+ <h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
76
+ Your model and dataset have been validated!
77
+ </h3>"""
78
+
79
  def get_dataset_fetch_error_raw(error):
80
  return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""