Klaudia Thellmann commited on
Commit
d541259
2 Parent(s): 9903f37 625e239

Merge pull request #12 from OpenGPTX/fix/belebele_fewshot

Browse files

Refactorings and fixes for tab handling and few-shot selection

Files changed (2) hide show
  1. app.py +40 -35
  2. core.py +31 -66
app.py CHANGED
@@ -12,6 +12,8 @@ with demo:
12
  elem_classes="markdown-text",
13
  )
14
 
 
 
15
  with gr.Column():
16
  with gr.Row():
17
  with gr.Column():
@@ -67,10 +69,9 @@ with demo:
67
  select.click(update_bar, inputs=[], outputs=langs_bar)
68
 
69
  with gr.Row():
70
- acc_task_group_names = core.task_groups_with_task_type("accuracy")
71
  shown_tasks = gr.CheckboxGroup(
72
- choices=acc_task_group_names,
73
- value=acc_task_group_names,
74
  label="Select tasks to show",
75
  elem_id="column-select",
76
  interactive=True,
@@ -80,15 +81,8 @@ with demo:
80
  choices=[("0-Shot", False), ("Few-shot", True)],
81
  value=True,
82
  label="Select evaluation type",
83
- interactive=True,
84
  scale=29,
85
  )
86
- demo.load(
87
- core.fix_zeroshot, [shown_tasks, fewshot], shown_tasks
88
- )
89
- fewshot.change(
90
- core.fix_zeroshot, [shown_tasks, fewshot], shown_tasks
91
- )
92
  clear = gr.ClearButton(
93
  shown_tasks, value="Deselect all tasks", size="sm", scale=21
94
  )
@@ -104,33 +98,44 @@ with demo:
104
  id=1,
105
  ) as misc:
106
  leaderboard_table_misc = gr.Dataframe()
107
- acc.select(
108
- lambda x: core.update_tab_tasks(0, x),
109
- inputs=fewshot,
110
- outputs=[shown_tasks, fewshot],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  )
112
- misc.select(
113
- lambda x: core.update_tab_tasks(1, x),
114
- inputs=fewshot,
115
- outputs=[shown_tasks, fewshot],
116
  )
117
- for comp, fn in [
118
- (search_bar, "submit"),
119
- (langs_bar, "change"),
120
- (shown_tasks, "change"),
121
- (fewshot, "change"),
122
- (model_types, "change"),
123
- ]:
124
- getattr(comp, fn)(
125
- core.update_df,
126
- [shown_tasks, search_bar, langs_bar, model_types, fewshot],
127
- leaderboard_table,
128
- )
129
- getattr(comp, fn)(
130
- core.update_df,
131
- [shown_tasks, search_bar, langs_bar, model_types, fewshot],
132
- leaderboard_table_misc,
133
- )
134
 
135
 
136
  gr.Blocks.load(
 
12
  elem_classes="markdown-text",
13
  )
14
 
15
+ selected_tab = gr.State(value=0)
16
+
17
  with gr.Column():
18
  with gr.Row():
19
  with gr.Column():
 
69
  select.click(update_bar, inputs=[], outputs=langs_bar)
70
 
71
  with gr.Row():
 
72
  shown_tasks = gr.CheckboxGroup(
73
+ choices=[],
74
+ value=[],
75
  label="Select tasks to show",
76
  elem_id="column-select",
77
  interactive=True,
 
81
  choices=[("0-Shot", False), ("Few-shot", True)],
82
  value=True,
83
  label="Select evaluation type",
 
84
  scale=29,
85
  )
 
 
 
 
 
 
86
  clear = gr.ClearButton(
87
  shown_tasks, value="Deselect all tasks", size="sm", scale=21
88
  )
 
98
  id=1,
99
  ) as misc:
100
  leaderboard_table_misc = gr.Dataframe()
101
+
102
+ demo.load(
103
+ core.update_task_groups_and_fewshot,
104
+ [gr.State(value=0), fewshot],
105
+ [shown_tasks, fewshot, selected_tab],
106
+ )
107
+ fewshot.change(
108
+ core.update_task_groups_and_fewshot,
109
+ [selected_tab, fewshot],
110
+ [shown_tasks, fewshot, selected_tab],
111
+ )
112
+ acc.select(
113
+ core.update_task_groups_and_fewshot,
114
+ inputs=[gr.State(value=0), fewshot],
115
+ outputs=[shown_tasks, fewshot, selected_tab],
116
+ )
117
+ misc.select(
118
+ core.update_task_groups_and_fewshot,
119
+ inputs=[gr.State(value=1), fewshot],
120
+ outputs=[shown_tasks, fewshot, selected_tab],
121
+ )
122
+ for comp, fn in [
123
+ (search_bar, "submit"),
124
+ (langs_bar, "change"),
125
+ (shown_tasks, "change"),
126
+ (fewshot, "change"),
127
+ (model_types, "change"),
128
+ ]:
129
+ getattr(comp, fn)(
130
+ core.update_df,
131
+ [shown_tasks, search_bar, langs_bar, model_types, fewshot],
132
+ leaderboard_table,
133
  )
134
+ getattr(comp, fn)(
135
+ core.update_df,
136
+ [shown_tasks, search_bar, langs_bar, model_types, fewshot],
137
+ leaderboard_table_misc,
138
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
 
141
  gr.Blocks.load(
core.py CHANGED
@@ -9,9 +9,8 @@ from datasets import load_dataset
9
 
10
  import style
11
 
12
- TAB_STATE = 0 # FIXME
13
- NO_FEWSHOT = ["BELEBELE"] # FIXME
14
- NO_ZEROSHOT = ["GSM8K", "TruthfulQA"] # FIXME
15
 
16
 
17
  def init():
@@ -126,68 +125,29 @@ def update_df(
126
  return sort_cols(df, fewshot)
127
 
128
 
129
- def fix_zeroshot(tasks: list[str | int | float], fewshot: bool = False):
130
- global TAB_STATE
131
- selected_task_type = get_selected_task_type(TAB_STATE)
132
- choices = task_groups_with_task_type(selected_task_type)
133
- if not fewshot:
134
- choices = [c for c in choices if c not in NO_ZEROSHOT]
135
- value = [v for v in tasks if v in choices]
136
- value += [t for t in NO_FEWSHOT if t not in value]
137
- else:
138
- if TAB_STATE == 0:
139
- choices = [c for c in choices if c not in NO_FEWSHOT]
140
- value = [v for v in tasks if v in choices]
141
- value += [t for t in NO_ZEROSHOT if t not in value]
142
- elif TAB_STATE == 1:
143
- value = [v for v in tasks if v in choices]
144
- shown_tasks = gr.CheckboxGroup(
145
- choices=choices,
146
- value=value,
147
- label="Select tasks to show",
148
- elem_id="column-select",
149
- interactive=True,
150
- scale=50,
151
  )
152
- return shown_tasks
153
-
154
-
155
- def update_tab_tasks(id: int, fewshot: bool = False):
156
- # when the tab is changed, update the TAB_STATE accordingly
157
- global TAB_STATE
158
- TAB_STATE = id
159
- selected_task_type = get_selected_task_type(TAB_STATE)
160
- choices = task_groups_with_task_type(selected_task_type)
161
- if not fewshot:
162
- choices = [c for c in choices if c not in NO_ZEROSHOT]
163
- values = choices.copy()
164
- shown_tasks = gr.CheckboxGroup(
165
- choices=choices,
166
- value=values,
167
- label="Select tasks to show",
168
- elem_id="column-select",
169
- interactive=True,
170
- scale=50,
171
  )
172
- if id == 0:
173
- # switching to accuracy tab, default to fewshot
174
- fewshot = gr.Radio(
175
- choices=[("0-Shot", False), ("Few-shot", True)],
176
- value=True,
177
- label="Select evaluation type",
178
- interactive=True,
179
- scale=29,
180
- )
181
- elif id == 1:
182
- # switching to translation tab, default to 0-shot and disable selection
183
- fewshot = gr.Radio(
184
- choices=[("0-Shot", False), ("Few-shot", True)],
185
- value=False,
186
- label="Select evaluation type",
187
- interactive=False,
188
- scale=29,
189
- )
190
- return [shown_tasks, fewshot]
191
 
192
 
193
  def get_selected_task_type(task_type_id):
@@ -196,10 +156,15 @@ def get_selected_task_type(task_type_id):
196
  return selected_task_type
197
 
198
 
199
- def task_groups_with_task_type(selected_task_type):
200
- choices = [task_group_name for task_group_name, task_type in task_group_type_dict.items() if task_type == selected_task_type]
 
 
 
 
 
201
 
202
- return choices
203
 
204
 
205
  init()
 
9
 
10
  import style
11
 
12
+ ZERO_SHOT_ONLY = ["BELEBELE"]
13
+ FEW_SHOT_ONLY = ["GSM8K", "TruthfulQA"]
 
14
 
15
 
16
  def init():
 
125
  return sort_cols(df, fewshot)
126
 
127
 
128
+ def update_task_groups_and_fewshot(current_selected_tab: int, is_fewshot_current: bool = False):
129
+ selected_task_type = get_selected_task_type(current_selected_tab)
130
+ available_tasks = get_available_task_groups(selected_task_type, is_fewshot_current)
131
+ new_selected_tasks = available_tasks.copy()
132
+
133
+ tasks_checkbox_group_update = gr.CheckboxGroup(
134
+ choices=available_tasks,
135
+ value=new_selected_tasks,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  )
137
+
138
+ if current_selected_tab == 0:
139
+ is_fewshot_new = is_fewshot_current
140
+ fewshot_available = True
141
+ elif current_selected_tab == 1:
142
+ is_fewshot_new = False
143
+ fewshot_available = False
144
+
145
+ fewshot_radio_update = gr.Radio(
146
+ value=is_fewshot_new,
147
+ interactive=fewshot_available,
 
 
 
 
 
 
 
 
148
  )
149
+
150
+ return [tasks_checkbox_group_update, fewshot_radio_update, current_selected_tab]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
 
153
  def get_selected_task_type(task_type_id):
 
156
  return selected_task_type
157
 
158
 
159
+ def get_available_task_groups(selected_task_type, fewshot):
160
+ task_groups = [task_group_name for task_group_name, task_type in task_group_type_dict.items() if task_type == selected_task_type]
161
+
162
+ if fewshot:
163
+ available_tasks = [c for c in task_groups if c not in ZERO_SHOT_ONLY]
164
+ else:
165
+ available_tasks = [c for c in task_groups if c not in FEW_SHOT_ONLY]
166
 
167
+ return available_tasks
168
 
169
 
170
  init()