Klaudia Thellmann commited on
Commit
9903f37
2 Parent(s): 57ec188 3cf41e9

Merge pull request #11 from OpenGPTX/fix/belebele_fewshot

Browse files

Don't show belebele in fewshot mode; Don't show truthfulqa in zero shot setting; Remove Plots

Files changed (2) hide show
  1. app.py +1 -14
  2. core.py +9 -57
app.py CHANGED
@@ -104,8 +104,6 @@ with demo:
104
  id=1,
105
  ) as misc:
106
  leaderboard_table_misc = gr.Dataframe()
107
- with gr.TabItem("Plots", elem_id="llm-plot-tab", id=2) as plot:
108
- leaderboard_plot = gr.Plot(elem_id="plot")
109
  acc.select(
110
  lambda x: core.update_tab_tasks(0, x),
111
  inputs=fewshot,
@@ -133,11 +131,7 @@ with demo:
133
  [shown_tasks, search_bar, langs_bar, model_types, fewshot],
134
  leaderboard_table_misc,
135
  )
136
- getattr(comp, fn)(
137
- core.update_plot,
138
- [shown_tasks, search_bar, langs_bar, model_types, fewshot],
139
- leaderboard_plot,
140
- )
141
 
142
  gr.Blocks.load(
143
  block=demo,
@@ -153,11 +147,4 @@ with demo:
153
  outputs=leaderboard_table_misc,
154
  )
155
 
156
- gr.Blocks.load(
157
- block=demo,
158
- fn=core.update_plot,
159
- inputs=[shown_tasks, search_bar, langs_bar, model_types, fewshot],
160
- outputs=leaderboard_plot,
161
- )
162
-
163
  demo.launch()
 
104
  id=1,
105
  ) as misc:
106
  leaderboard_table_misc = gr.Dataframe()
 
 
107
  acc.select(
108
  lambda x: core.update_tab_tasks(0, x),
109
  inputs=fewshot,
 
131
  [shown_tasks, search_bar, langs_bar, model_types, fewshot],
132
  leaderboard_table_misc,
133
  )
134
+
 
 
 
 
135
 
136
  gr.Blocks.load(
137
  block=demo,
 
147
  outputs=leaderboard_table_misc,
148
  )
149
 
 
 
 
 
 
 
 
150
  demo.launch()
core.py CHANGED
@@ -10,8 +10,8 @@ from datasets import load_dataset
10
  import style
11
 
12
  TAB_STATE = 0 # FIXME
13
- GSM8K_TASK_GROUP_NAME = "GSM8K" # FIXME
14
- BELEBELE_TASK_GROUP_NAME = "BELEBELE" # FIXME
15
 
16
 
17
  def init():
@@ -45,10 +45,6 @@ def init():
45
 
46
  def sort_cols(df: pd.DataFrame, fewshot: bool = False) -> pd.DataFrame:
47
  task_cols = get_task_columns(df)
48
- if fewshot:
49
- renamer = {col: f"{col} ({task_groups_shots_dict[col]}-shot)" for col in task_cols if col in task_groups_shots_dict}
50
- df.rename(columns=renamer, inplace=True)
51
- task_cols = renamer.values()
52
  return df.reindex(["Type", "Model_Name", "Average"] + sorted(task_cols), axis=1)
53
 
54
 
@@ -125,59 +121,24 @@ def update_df(
125
  df = filter_type(df, model_types)
126
 
127
  if format:
128
- return sort_cols(df, fewshot).style.format(precision=2, decimal=".")
129
  else:
130
  return sort_cols(df, fewshot)
131
 
132
 
133
- def make_plot(df: pd.DataFrame):
134
- df.columns = df.loc["Model_Name"]
135
- df = df.drop("Model_Name")
136
- df = df.reset_index(names="task")
137
- if len(df.columns) > 2:
138
- fig = px.line(data_frame=df, x="task", y=df.columns, markers=True, width=1200)
139
- else:
140
- fig = px.bar(data_frame=df, x="task", y=df.columns[-1], width=1200)
141
- fig.update_xaxes(type="category")
142
- return fig
143
-
144
-
145
- def update_plot(
146
- tasks: list[str],
147
- model_query: str,
148
- langs: list[str],
149
- model_types: list[str],
150
- fewshot: bool = False,
151
- ):
152
- df = update_df(tasks, model_query, langs, model_types, fewshot, False).transpose()
153
- plot = make_plot(df)
154
- return plot
155
-
156
-
157
  def fix_zeroshot(tasks: list[str | int | float], fewshot: bool = False):
158
  global TAB_STATE
159
  selected_task_type = get_selected_task_type(TAB_STATE)
160
  choices = task_groups_with_task_type(selected_task_type)
161
  if not fewshot:
162
- try:
163
- choices.remove(GSM8K_TASK_GROUP_NAME)
164
- except ValueError:
165
- pass
166
- if TAB_STATE == 0:
167
- value = [v for v in tasks if v in choices]
168
- if BELEBELE_TASK_GROUP_NAME not in value:
169
- value += [BELEBELE_TASK_GROUP_NAME]
170
- elif TAB_STATE == 1:
171
- value = [v for v in tasks if v in choices]
172
  else:
173
- try:
174
- choices.remove(BELEBELE_TASK_GROUP_NAME)
175
- except ValueError:
176
- pass
177
  if TAB_STATE == 0:
 
178
  value = [v for v in tasks if v in choices]
179
- if GSM8K_TASK_GROUP_NAME not in value:
180
- value += [GSM8K_TASK_GROUP_NAME]
181
  elif TAB_STATE == 1:
182
  value = [v for v in tasks if v in choices]
183
  shown_tasks = gr.CheckboxGroup(
@@ -198,16 +159,7 @@ def update_tab_tasks(id: int, fewshot: bool = False):
198
  selected_task_type = get_selected_task_type(TAB_STATE)
199
  choices = task_groups_with_task_type(selected_task_type)
200
  if not fewshot:
201
- try:
202
- choices.remove(GSM8K_TASK_GROUP_NAME)
203
- except ValueError:
204
- pass
205
- else:
206
- try:
207
- choices.remove(BELEBELE_TASK_GROUP_NAME)
208
- except ValueError:
209
- pass
210
-
211
  values = choices.copy()
212
  shown_tasks = gr.CheckboxGroup(
213
  choices=choices,
 
10
  import style
11
 
12
  TAB_STATE = 0 # FIXME
13
+ NO_FEWSHOT = ["BELEBELE"] # FIXME
14
+ NO_ZEROSHOT = ["GSM8K", "TruthfulQA"] # FIXME
15
 
16
 
17
  def init():
 
45
 
46
  def sort_cols(df: pd.DataFrame, fewshot: bool = False) -> pd.DataFrame:
47
  task_cols = get_task_columns(df)
 
 
 
 
48
  return df.reindex(["Type", "Model_Name", "Average"] + sorted(task_cols), axis=1)
49
 
50
 
 
121
  df = filter_type(df, model_types)
122
 
123
  if format:
124
+ return sort_cols(df, fewshot).style.format(precision=2, decimal=".", na_rep="N/A")
125
  else:
126
  return sort_cols(df, fewshot)
127
 
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  def fix_zeroshot(tasks: list[str | int | float], fewshot: bool = False):
130
  global TAB_STATE
131
  selected_task_type = get_selected_task_type(TAB_STATE)
132
  choices = task_groups_with_task_type(selected_task_type)
133
  if not fewshot:
134
+ choices = [c for c in choices if c not in NO_ZEROSHOT]
135
+ value = [v for v in tasks if v in choices]
136
+ value += [t for t in NO_FEWSHOT if t not in value]
 
 
 
 
 
 
 
137
  else:
 
 
 
 
138
  if TAB_STATE == 0:
139
+ choices = [c for c in choices if c not in NO_FEWSHOT]
140
  value = [v for v in tasks if v in choices]
141
+ value += [t for t in NO_ZEROSHOT if t not in value]
 
142
  elif TAB_STATE == 1:
143
  value = [v for v in tasks if v in choices]
144
  shown_tasks = gr.CheckboxGroup(
 
159
  selected_task_type = get_selected_task_type(TAB_STATE)
160
  choices = task_groups_with_task_type(selected_task_type)
161
  if not fewshot:
162
+ choices = [c for c in choices if c not in NO_ZEROSHOT]
 
 
 
 
 
 
 
 
 
163
  values = choices.copy()
164
  shown_tasks = gr.CheckboxGroup(
165
  choices=choices,