giskard-evaluator

Running

App Files Files Community

200

inoki-giskard

ZeroCommand commited on Jan 4

Commit

8f809e2

•

1 Parent(s): be473e6

GSK-2434 Add component to show logs (#17)

Browse files

- update log area (f04482da801bfe6bf8c4f1fa92fab5677aa14158)
- fix pipe io bug (e631fcc66670d557b9e8f885528a392cd01188fd)
- clean code (748c85beba7853d456e65a347bb5eb03bafbe7cc)
- remove pipe| (b0a573f944ae019c75a4cde2876fa17b10b41add)
- fix run job logs (64f50dd45d73b4615e83790e7bf6c1e9c28a346f)
- add every for logs textbox (aaa034c2aae0d0d16ce112cced026c3d0fe04104)
- refresh log files not working (89d01cfcb69c644aa39afcbbcbb88d27c337ef9d)
- show log with textbox value (f227810f647cd8c0c849bd0338f289422971772f)
- fix log refresh (7f4008b268536b4f60fc0b9b57ad9fe5331b786a)

Co-authored-by: zcy <ZeroCommand@users.noreply.huggingface.co>

Files changed (9) hide show

app.py +22 -11
app_leaderboard.py +3 -4
app_text_classification.py +46 -181
fetch_utils.py +0 -1
io_utils.py +59 -2
run_jobs.py +29 -0
text_classification_ui_helpers.py +181 -0
tmp/pipe +0 -0
wordings.py +1 -1

app.py CHANGED Viewed

@@ -1,17 +1,28 @@
-# Start apps
-# from pathlib import Path
 import gradio as gr
 from app_text_classification import get_demo as get_demo_text_classification
 from app_leaderboard import get_demo as get_demo_leaderboard
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
-    with gr.Tab("Text Classification"):
-        get_demo_text_classification()
-    with gr.Tab("Leaderboard"):
-        get_demo_leaderboard()
-demo.queue(max_size=100)
-demo.launch(share=False)

 import gradio as gr
+import atexit
 from app_text_classification import get_demo as get_demo_text_classification
 from app_leaderboard import get_demo as get_demo_leaderboard
+from run_jobs import start_process_run_job, stop_thread
+import threading
+if threading.current_thread() is not threading.main_thread():
+    t = threading.current_thread()
+try:
+    with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
+        with gr.Tab("Text Classification"):
+            get_demo_text_classification(demo)
+        with gr.Tab("Leaderboard"):
+            get_demo_leaderboard()
+    start_process_run_job()
+    demo.queue(max_size=100)
+    demo.launch(share=False)
+    atexit.register(stop_thread)
+except Exception:
+    print("stop background thread")
+    stop_thread()

app_leaderboard.py CHANGED Viewed

@@ -32,7 +32,7 @@ def get_dataset_ids(ds):
     return dataset_ids
 def get_types(ds):
-    # set all types for each column
     types = [str(t) for t in ds.dtypes.to_list()]
     types = [t.replace('object', 'markdown') for t in types]
     types = [t.replace('float64', 'number') for t in types]
@@ -61,10 +61,9 @@ def get_demo():
     column_names = records.columns.tolist()
     default_columns = ['model_id', 'dataset_id', 'total_issues', 'report_link']
-    # set the default columns to show
-    default_df = records[default_columns]
     types = get_types(default_df)
-    display_df = get_display_df(default_df)
     with gr.Row():
         task_select = gr.Dropdown(label='Task', choices=['text_classification', 'tabular'], value='text_classification', interactive=True)

     return dataset_ids
 def get_types(ds):
+    # set types for each column
     types = [str(t) for t in ds.dtypes.to_list()]
     types = [t.replace('object', 'markdown') for t in types]
     types = [t.replace('float64', 'number') for t in types]
     column_names = records.columns.tolist()
     default_columns = ['model_id', 'dataset_id', 'total_issues', 'report_link']
+    default_df = records[default_columns] # extract columns selected
     types = get_types(default_df)
+    display_df = get_display_df(default_df) # the styled dataframe to display
     with gr.Row():
         task_select = gr.Dropdown(label='Task', choices=['text_classification', 'tabular'], value='text_classification', interactive=True)

app_text_classification.py CHANGED Viewed

@@ -1,22 +1,8 @@
 import gradio as gr
-import datasets
-import os
-import time
-import subprocess
-import logging
-import collections
-import json
-from transformers.pipelines import TextClassificationPipeline
-from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction
-from io_utils import read_scanners, write_scanners, read_inference_type, read_column_mapping, write_column_mapping, write_inference_type
-from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_RAW
-HF_REPO_ID = 'HF_REPO_ID'
-HF_SPACE_ID = 'SPACE_ID'
-HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
 MAX_LABELS = 20
 MAX_FEATURES = 20
@@ -25,76 +11,7 @@ EXAMPLE_MODEL_ID = 'cardiffnlp/twitter-roberta-base-sentiment-latest'
 EXAMPLE_DATA_ID = 'tweet_eval'
 CONFIG_PATH='./config.yaml'
-def try_submit(m_id, d_id, config, split, local):
-    all_mappings = read_column_mapping(CONFIG_PATH)
-    if "labels" not in all_mappings.keys():
-        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-        return gr.update(interactive=True)
-    label_mapping = all_mappings["labels"]
-    if "features" not in all_mappings.keys():
-        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-        return gr.update(interactive=True)
-    feature_mapping = all_mappings["features"]
-    # TODO: Set column mapping for some dataset such as `amazon_polarity`
-    if local:
-        command = [
-            "python",
-            "cli.py",
-            "--loader", "huggingface",
-            "--model", m_id,
-            "--dataset", d_id,
-            "--dataset_config", config,
-            "--dataset_split", split,
-            "--hf_token", os.environ.get(HF_WRITE_TOKEN),
-            "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
-            "--output_format", "markdown",
-            "--output_portal", "huggingface",
-            "--feature_mapping", json.dumps(feature_mapping),
-            "--label_mapping", json.dumps(label_mapping),
-            "--scan_config", "../config.yaml",
-        ]
-        eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
-        start = time.time()
-        logging.info(f"Start local evaluation on {eval_str}")
-        evaluator = subprocess.Popen(
-            command,
-            cwd=os.path.join(os.path.dirname(os.path.realpath(__file__)), "cicd"),
-            stderr=subprocess.STDOUT,
-        )
-        result = evaluator.wait()
-        logging.info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
-        gr.Info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
-    else:
-        gr.Info("TODO: Submit task to an endpoint")
-    return gr.update(interactive=True)  # Submit button
-def check_dataset_and_get_config(dataset_id):
-    try:
-        configs = datasets.get_dataset_config_names(dataset_id)
-        return gr.Dropdown(configs, value=configs[0], visible=True)
-    except Exception:
-        # Dataset may not exist
-        pass
-def check_dataset_and_get_split(dataset_id, dataset_config):
-    try:
-        splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
-        return gr.Dropdown(splits, value=splits[0], visible=True)
-    except Exception:
-        # Dataset may not exist
-        # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
-        pass
-def get_demo():
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
     with gr.Row():
@@ -137,6 +54,9 @@ def get_demo():
     with gr.Accordion(label='Scanner Advance Config (optional)', open=False):
         selected = read_scanners('./config.yaml')
         scan_config = selected + ['data_leakage']
         scanners = gr.CheckboxGroup(choices=scan_config, value=selected, label='Scan Settings', visible=True)
@@ -147,102 +67,21 @@ def get_demo():
             interactive=True,
             size="lg",
         )
-    @gr.on(triggers=[label.change for label in column_mappings],
-           inputs=[dataset_id_input, dataset_config_input, dataset_split_input, *column_mappings])
-    def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *labels):
-        ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
-        if labels is None:
-            return
-        labels = [*labels]
-        all_mappings = read_column_mapping(CONFIG_PATH)
-        if "labels" not in all_mappings.keys():
-            all_mappings["labels"] = dict()
-        for i, label in enumerate(labels[:MAX_LABELS]):
-            if label:
-                all_mappings["labels"][label] = ds_labels[i]
-        if "features" not in all_mappings.keys():
-            all_mappings["features"] = dict()
-        for i, feat in enumerate(labels[MAX_LABELS:(MAX_LABELS + MAX_FEATURES)]):
-            if feat:
-                all_mappings["features"][feat] = ds_features[i]
-        write_column_mapping(all_mappings)
-    def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
-        model_labels = list(model_id2label.values())
-        lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
-        lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
-        # TODO: Substitute 'text' with more features for zero-shot
-        features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in ['text']]
-        features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
-        return lables + features
-    @gr.on(triggers=[model_id_input.change, dataset_config_input.change])
-    def clear_column_mapping_config():
-        write_column_mapping(None)
-    @gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
         outputs=[example_input, example_prediction, column_mapping_accordion, *column_mappings])
-    def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
-        ppl = check_model(model_id)
-        if ppl is None or not isinstance(ppl, TextClassificationPipeline):
-            gr.Warning("Please check your model.")
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
-            )
-        dropdown_placement = [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
-        if ppl is None: # pipeline not found
-            gr.Warning("Model not found")
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False, open=False),
-                *dropdown_placement
-            )
-        model_id2label = ppl.model.config.id2label
-        ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
-        # when dataset does not have labels or features
-        if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
-            gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False, open=False),
-                *dropdown_placement
-            )
-        column_mappings = list_labels_and_features_from_dataset(
-            ds_labels,
-            ds_features,
-            model_id2label,
-        )
-        # when labels or features are not aligned
-        # show manually column mapping
-        if collections.Counter(model_id2label.items()) != collections.Counter(ds_labels) or ds_features[0] != 'text':
-            gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=True, open=True),
-                *column_mappings
-            )
-        prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
-        return (
-            gr.update(value=prediction_input, visible=True),
-            gr.update(value=prediction_output, visible=True),
-            gr.update(visible=True, open=False),
-            *column_mappings
-        )
     dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
@@ -266,5 +105,31 @@ def get_demo():
             run_btn.click,
             ],
         fn=try_submit,
-        inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, run_local],
         outputs=[run_btn])

 import gradio as gr
+import uuid
+from io_utils import read_scanners, write_scanners, read_inference_type, write_inference_type, get_logs_file
+from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD
+from text_classification_ui_helpers import try_submit, check_dataset_and_get_config, check_dataset_and_get_split, check_model_and_show_prediction, write_column_mapping_to_config, get_logs_file
 MAX_LABELS = 20
 MAX_FEATURES = 20
 EXAMPLE_DATA_ID = 'tweet_eval'
 CONFIG_PATH='./config.yaml'
+def get_demo(demo):
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
     with gr.Row():
     with gr.Accordion(label='Scanner Advance Config (optional)', open=False):
         selected = read_scanners('./config.yaml')
+        # currently we remove data_leakage from the default scanners
+        # Reason: data_leakage barely raises any issues and takes too many requests
+        # when using inference API, causing rate limit error
         scan_config = selected + ['data_leakage']
         scanners = gr.CheckboxGroup(choices=scan_config, value=selected, label='Scan Settings', visible=True)
             interactive=True,
             size="lg",
         )
+    with gr.Row():
+        uid = uuid.uuid4()
+        uid_label = gr.Textbox(label="Evaluation ID:", value=uid, visible=False, interactive=False)
+        logs = gr.Textbox(label="Giskard Bot Evaluation Log:", visible=False)
+        demo.load(get_logs_file, uid_label, logs, every=0.5)
+    gr.on(triggers=[label.change for label in column_mappings],
+        fn=write_column_mapping_to_config,
+        inputs=[dataset_id_input, dataset_config_input, dataset_split_input, *column_mappings])
+    gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
+        fn=check_model_and_show_prediction,
         inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
         outputs=[example_input, example_prediction, column_mapping_accordion, *column_mappings])
     dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
             run_btn.click,
             ],
         fn=try_submit,
+        inputs=[
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
+            run_local,
+            uid_label],
+        outputs=[run_btn, logs])
+    def enable_run_btn():
+        return (gr.update(interactive=True))
+    gr.on(
+        triggers=[
+                model_id_input.change,
+                dataset_config_input.change,
+                dataset_split_input.change,
+                run_inference.change,
+                run_local.change,
+                scanners.change],
+        fn=enable_run_btn,
+        inputs=None,
+        outputs=[run_btn])
+    gr.on(
+        triggers=[label.change for label in column_mappings],
+        fn=enable_run_btn,
+        inputs=None,
         outputs=[run_btn])

fetch_utils.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import huggingface_hub
 import datasets
 import logging



1	import datasets
2	import logging
3

io_utils.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import yaml
 YAML_PATH = "./config.yaml"
 class Dumper(yaml.Dumper):
     def increase_indent(self, flow=False, *args, **kwargs):
@@ -49,14 +52,17 @@ def read_column_mapping(path):
     column_mapping = {}
     with open(path, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
-        column_mapping = config.get("column_mapping", dict())
     return column_mapping
 # write column mapping to yaml file
 def write_column_mapping(mapping):
     with open(YAML_PATH, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
-    if mapping is None:
         del config["column_mapping"]
     else:
         config["column_mapping"] = mapping
@@ -71,3 +77,54 @@ def convert_column_mapping_to_json(df, label=""):
     for _, row in df.iterrows():
         column_mapping[label].append(row.tolist())
     return column_mapping

 import yaml
+import subprocess
+import os
 YAML_PATH = "./config.yaml"
+PIPE_PATH = "./tmp/pipe"
 class Dumper(yaml.Dumper):
     def increase_indent(self, flow=False, *args, **kwargs):
     column_mapping = {}
     with open(path, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
+        if config:
+            column_mapping = config.get("column_mapping", dict())
     return column_mapping
 # write column mapping to yaml file
 def write_column_mapping(mapping):
     with open(YAML_PATH, "r") as f:
         config = yaml.load(f, Loader=yaml.FullLoader)
+    if config is None:
+        return
+    if mapping is None and "column_mapping" in config.keys():
         del config["column_mapping"]
     else:
         config["column_mapping"] = mapping
     for _, row in df.iterrows():
         column_mapping[label].append(row.tolist())
     return column_mapping
+def get_logs_file(uid):
+    try:
+        file = open(f"./tmp/{uid}_log", "r")
+        return file.read()
+    except Exception:
+        return "Log file does not exist"
+def write_log_to_user_file(id, log):
+    with open(f"./tmp/{id}_log", "a") as f:
+        f.write(log)
+def save_job_to_pipe(id, job, lock):
+    if not os.path.exists('./tmp'):
+        os.makedirs('./tmp')
+    job = [str(i) for i in job]
+    job = ",".join(job)
+    print(job)
+    with lock:
+        with open(PIPE_PATH, "a") as f:
+            # write each element in job
+            f.write(f'{id}@{job}\n')
+def pop_job_from_pipe():
+    if not os.path.exists(PIPE_PATH):
+        return
+    with open(PIPE_PATH, "r") as f:
+        job = f.readline().strip()
+        remaining = f.readlines()
+        f.close()
+    print(job, remaining, ">>>>")
+    with open(PIPE_PATH, "w") as f:
+        f.write("\n".join(remaining))
+        f.close()
+    if len(job) == 0:
+        return
+    job_info = job.split('\n')[0].split("@")
+    if len(job_info) != 2:
+        raise ValueError("Invalid job info: ", job_info)
+    write_log_to_user_file(job_info[0], f"Running job {job_info}")
+    command = job_info[1].split(",")
+    write_log_to_user_file(job_info[0], f"Running command {command}")
+    log_file = open(f"./tmp/{job_info[0]}_log", "a")
+    subprocess.Popen(
+        command,
+        cwd=os.path.join(os.path.dirname(os.path.realpath(__file__)), "cicd"),
+        stdout=log_file,
+        stderr=log_file,
+    )

run_jobs.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from io_utils import pop_job_from_pipe
+import time
+import threading
+def start_process_run_job():
+    try:
+        print("Running jobs in thread")
+        global thread
+        thread = threading.Thread(target=run_job)
+        thread.daemon = True
+        thread.do_run = True
+        thread.start()
+    except Exception as e:
+        print("Failed to start thread: ", e)
+def stop_thread():
+    print("Stop thread")
+    thread.do_run = False
+def run_job():
+    while True:
+        print(thread.do_run)
+        try:
+            pop_job_from_pipe()
+            time.sleep(10)
+        except KeyboardInterrupt:
+            print("KeyboardInterrupt stop background thread")
+            stop_thread()
+            break

text_classification_ui_helpers.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import gradio as gr
+from wordings import CONFIRM_MAPPING_DETAILS_FAIL_RAW
+import json
+import os
+import logging
+import threading
+from io_utils import read_column_mapping, write_column_mapping, save_job_to_pipe, write_log_to_user_file
+import datasets
+import collections
+from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction
+from transformers.pipelines import TextClassificationPipeline
+MAX_LABELS = 20
+MAX_FEATURES = 20
+HF_REPO_ID = 'HF_REPO_ID'
+HF_SPACE_ID = 'SPACE_ID'
+HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
+CONFIG_PATH = "./config.yaml"
+def check_dataset_and_get_config(dataset_id):
+    try:
+        write_column_mapping(None)
+        configs = datasets.get_dataset_config_names(dataset_id)
+        return gr.Dropdown(configs, value=configs[0], visible=True)
+    except Exception:
+        # Dataset may not exist
+        pass
+def check_dataset_and_get_split(dataset_id, dataset_config):
+    try:
+        splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
+        return gr.Dropdown(splits, value=splits[0], visible=True)
+    except Exception:
+        # Dataset may not exist
+        # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
+        pass
+def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *labels):
+    ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
+    if labels is None:
+        return
+    labels = [*labels]
+    all_mappings = read_column_mapping(CONFIG_PATH)
+    if all_mappings is None:
+        all_mappings = dict()
+    if "labels" not in all_mappings.keys():
+        all_mappings["labels"] = dict()
+    for i, label in enumerate(labels[:MAX_LABELS]):
+        if label:
+            all_mappings["labels"][label] = ds_labels[i]
+    if "features" not in all_mappings.keys():
+        all_mappings["features"] = dict()
+    for i, feat in enumerate(labels[MAX_LABELS:(MAX_LABELS + MAX_FEATURES)]):
+        if feat:
+            all_mappings["features"][feat] = ds_features[i]
+    write_column_mapping(all_mappings)
+def list_labels_and_features_from_dataset(ds_labels, ds_features, model_id2label):
+    model_labels = list(model_id2label.values())
+    len_model_labels = len(model_labels)
+    print(model_labels, model_id2label, 3%len_model_labels)
+    lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i%len_model_labels], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
+    lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
+    # TODO: Substitute 'text' with more features for zero-shot
+    features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in ['text']]
+    features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
+    return lables + features
+def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
+    ppl = check_model(model_id)
+    if ppl is None or not isinstance(ppl, TextClassificationPipeline):
+        gr.Warning("Please check your model.")
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
+        )
+    dropdown_placement = [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
+    if ppl is None: # pipeline not found
+        gr.Warning("Model not found")
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False, open=False),
+            *dropdown_placement
+        )
+    model_id2label = ppl.model.config.id2label
+    ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
+    # when dataset does not have labels or features
+    if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
+        # gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False, open=False),
+            *dropdown_placement
+        )
+    column_mappings = list_labels_and_features_from_dataset(
+        ds_labels,
+        ds_features,
+        model_id2label,
+    )
+    # when labels or features are not aligned
+    # show manually column mapping
+    if collections.Counter(model_id2label.values()) != collections.Counter(ds_labels) or ds_features[0] != 'text':
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=True, open=True),
+            *column_mappings
+        )
+    prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
+    return (
+        gr.update(value=prediction_input, visible=True),
+        gr.update(value=prediction_output, visible=True),
+        gr.update(visible=True, open=False),
+        *column_mappings
+    )
+def try_submit(m_id, d_id, config, split, local, uid):
+    all_mappings = read_column_mapping(CONFIG_PATH)
+    if all_mappings is None:
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return (gr.update(interactive=True), gr.update(visible=False))
+    if "labels" not in all_mappings.keys():
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return (gr.update(interactive=True), gr.update(visible=False))
+    label_mapping = all_mappings["labels"]
+    if "features" not in all_mappings.keys():
+        gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
+        return (gr.update(interactive=True), gr.update(visible=False))
+    feature_mapping = all_mappings["features"]
+    # TODO: Set column mapping for some dataset such as `amazon_polarity`
+    if local:
+        command = [
+            "python",
+            "cli.py",
+            "--loader", "huggingface",
+            "--model", m_id,
+            "--dataset", d_id,
+            "--dataset_config", config,
+            "--dataset_split", split,
+            "--hf_token", os.environ.get(HF_WRITE_TOKEN),
+            "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
+            "--output_format", "markdown",
+            "--output_portal", "huggingface",
+            "--feature_mapping", json.dumps(feature_mapping),
+            "--label_mapping", json.dumps(label_mapping),
+            "--scan_config", "../config.yaml",
+        ]
+        eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
+        logging.info(f"Start local evaluation on {eval_str}")
+        save_job_to_pipe(uid, command, threading.Lock())
+        write_log_to_user_file(uid, f"Start local evaluation on {eval_str}. Please wait for your job to start...\n")
+        gr.Info(f"Start local evaluation on {eval_str}")
+        return (
+            gr.update(interactive=False),
+            gr.update(lines=5, visible=True, interactive=False))
+    else:
+        gr.Info("TODO: Submit task to an endpoint")
+    return (gr.update(interactive=True),  # Submit button
+            gr.update(visible=False))

tmp/pipe ADDED Viewed

File without changes

wordings.py CHANGED Viewed

@@ -8,7 +8,7 @@ CONFIRM_MAPPING_DETAILS_MD = '''
                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details
                             </h1>
-                            Please confirm the pre-processing details below. If you are not sure, please double check your model and dataset.
                             '''
 CONFIRM_MAPPING_DETAILS_FAIL_MD = '''
                             <h1 style="text-align: center;">

                             <h1 style="text-align: center;">
                             Confirm Pre-processing Details
                             </h1>
+                            Please confirm the pre-processing details below. Align the column names of your model in the <b>dropdown</b> menu to your dataset's. If you are not sure, please double check your model and dataset.
                             '''
 CONFIRM_MAPPING_DETAILS_FAIL_MD = '''
                             <h1 style="text-align: center;">