Spaces:

openGPT-X
/

european-llm-leaderboard

Running

App Files Files Community

Klaudia Thellmann commited on Jul 7

Commit

9903f37

•

2 Parent(s): 57ec188 3cf41e9

Merge pull request #11 from OpenGPTX/fix/belebele_fewshot

Browse files

Don't show belebele in fewshot mode; Don't show truthfulqa in zero shot setting; Remove Plots

Files changed (2) hide show

app.py +1 -14
core.py +9 -57

app.py CHANGED Viewed

@@ -104,8 +104,6 @@ with demo:
                 id=1,
             ) as misc:
                 leaderboard_table_misc = gr.Dataframe()
-            with gr.TabItem("Plots", elem_id="llm-plot-tab", id=2) as plot:
-                leaderboard_plot = gr.Plot(elem_id="plot")
             acc.select(
                 lambda x: core.update_tab_tasks(0, x),
                 inputs=fewshot,
@@ -133,11 +131,7 @@ with demo:
                     [shown_tasks, search_bar, langs_bar, model_types, fewshot],
                     leaderboard_table_misc,
                 )
-                getattr(comp, fn)(
-                    core.update_plot,
-                    [shown_tasks, search_bar, langs_bar, model_types, fewshot],
-                    leaderboard_plot,
-                )
     gr.Blocks.load(
         block=demo,
@@ -153,11 +147,4 @@ with demo:
         outputs=leaderboard_table_misc,
     )
-    gr.Blocks.load(
-        block=demo,
-        fn=core.update_plot,
-        inputs=[shown_tasks, search_bar, langs_bar, model_types, fewshot],
-        outputs=leaderboard_plot,
-    )
 demo.launch()

                 id=1,
             ) as misc:
                 leaderboard_table_misc = gr.Dataframe()
             acc.select(
                 lambda x: core.update_tab_tasks(0, x),
                 inputs=fewshot,
                     [shown_tasks, search_bar, langs_bar, model_types, fewshot],
                     leaderboard_table_misc,
                 )
     gr.Blocks.load(
         block=demo,
         outputs=leaderboard_table_misc,
     )
 demo.launch()

core.py CHANGED Viewed

@@ -10,8 +10,8 @@ from datasets import load_dataset
 import style
 TAB_STATE = 0  # FIXME
-GSM8K_TASK_GROUP_NAME = "GSM8K"  # FIXME
-BELEBELE_TASK_GROUP_NAME = "BELEBELE"  # FIXME
 def init():
@@ -45,10 +45,6 @@ def init():
 def sort_cols(df: pd.DataFrame, fewshot: bool = False) -> pd.DataFrame:
     task_cols = get_task_columns(df)
-    if fewshot:
-        renamer = {col: f"{col} ({task_groups_shots_dict[col]}-shot)" for col in task_cols if col in task_groups_shots_dict}
-        df.rename(columns=renamer, inplace=True)
-        task_cols = renamer.values()
     return df.reindex(["Type", "Model_Name", "Average"] + sorted(task_cols), axis=1)
@@ -125,59 +121,24 @@ def update_df(
     df = filter_type(df, model_types)
     if format:
-        return sort_cols(df, fewshot).style.format(precision=2, decimal=".")
     else:
         return sort_cols(df, fewshot)
-def make_plot(df: pd.DataFrame):
-    df.columns = df.loc["Model_Name"]
-    df = df.drop("Model_Name")
-    df = df.reset_index(names="task")
-    if len(df.columns) > 2:
-        fig = px.line(data_frame=df, x="task", y=df.columns, markers=True, width=1200)
-    else:
-        fig = px.bar(data_frame=df, x="task", y=df.columns[-1], width=1200)
-    fig.update_xaxes(type="category")
-    return fig
-def update_plot(
-    tasks: list[str],
-    model_query: str,
-    langs: list[str],
-    model_types: list[str],
-    fewshot: bool = False,
-):
-    df = update_df(tasks, model_query, langs, model_types, fewshot, False).transpose()
-    plot = make_plot(df)
-    return plot
 def fix_zeroshot(tasks: list[str | int | float], fewshot: bool = False):
     global TAB_STATE
     selected_task_type = get_selected_task_type(TAB_STATE)
     choices = task_groups_with_task_type(selected_task_type)
     if not fewshot:
-        try:
-            choices.remove(GSM8K_TASK_GROUP_NAME)
-        except ValueError:
-            pass
-        if TAB_STATE == 0:
-            value = [v for v in tasks if v in choices]
-            if BELEBELE_TASK_GROUP_NAME not in value:
-                value += [BELEBELE_TASK_GROUP_NAME]
-        elif TAB_STATE == 1:
-            value = [v for v in tasks if v in choices]
     else:
-        try:
-            choices.remove(BELEBELE_TASK_GROUP_NAME)
-        except ValueError:
-            pass
         if TAB_STATE == 0:
             value = [v for v in tasks if v in choices]
-            if GSM8K_TASK_GROUP_NAME not in value:
-                value += [GSM8K_TASK_GROUP_NAME]
         elif TAB_STATE == 1:
             value = [v for v in tasks if v in choices]
     shown_tasks = gr.CheckboxGroup(
@@ -198,16 +159,7 @@ def update_tab_tasks(id: int, fewshot: bool = False):
     selected_task_type = get_selected_task_type(TAB_STATE)
     choices = task_groups_with_task_type(selected_task_type)
     if not fewshot:
-        try:
-            choices.remove(GSM8K_TASK_GROUP_NAME)
-        except ValueError:
-            pass
-    else:
-        try:
-            choices.remove(BELEBELE_TASK_GROUP_NAME)
-        except ValueError:
-            pass
     values = choices.copy()
     shown_tasks = gr.CheckboxGroup(
         choices=choices,

 import style
 TAB_STATE = 0  # FIXME
+NO_FEWSHOT = ["BELEBELE"]  # FIXME
+NO_ZEROSHOT = ["GSM8K", "TruthfulQA"]  # FIXME
 def init():
 def sort_cols(df: pd.DataFrame, fewshot: bool = False) -> pd.DataFrame:
     task_cols = get_task_columns(df)
     return df.reindex(["Type", "Model_Name", "Average"] + sorted(task_cols), axis=1)
     df = filter_type(df, model_types)
     if format:
+        return sort_cols(df, fewshot).style.format(precision=2, decimal=".", na_rep="N/A")
     else:
         return sort_cols(df, fewshot)
 def fix_zeroshot(tasks: list[str | int | float], fewshot: bool = False):
     global TAB_STATE
     selected_task_type = get_selected_task_type(TAB_STATE)
     choices = task_groups_with_task_type(selected_task_type)
     if not fewshot:
+        choices = [c for c in choices if c not in NO_ZEROSHOT]
+        value = [v for v in tasks if v in choices]
+        value += [t for t in NO_FEWSHOT if t not in value]
     else:
         if TAB_STATE == 0:
+            choices = [c for c in choices if c not in NO_FEWSHOT]
             value = [v for v in tasks if v in choices]
+            value += [t for t in NO_ZEROSHOT if t not in value]
         elif TAB_STATE == 1:
             value = [v for v in tasks if v in choices]
     shown_tasks = gr.CheckboxGroup(
     selected_task_type = get_selected_task_type(TAB_STATE)
     choices = task_groups_with_task_type(selected_task_type)
     if not fewshot:
+        choices = [c for c in choices if c not in NO_ZEROSHOT]
     values = choices.copy()
     shown_tasks = gr.CheckboxGroup(
         choices=choices,