djstrong commited on
Commit
91765f5
1 Parent(s): 39d6a74

show perplexity for 5-shot too

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +5 -1
src/leaderboard/read_evals.py CHANGED
@@ -95,9 +95,13 @@ class EvalResult:
95
  for task in Tasks:
96
  task = task.value
97
 
 
 
 
 
98
  # We average all scores of a given metric (not all metrics are present in all files)
99
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
100
- task.benchmark == k and n_shot.get(k, -1) == n_shot_num])
101
  if accs.size == 0 or any([acc is None for acc in accs]):
102
  continue
103
 
 
95
  for task in Tasks:
96
  task = task.value
97
 
98
+ task_n_shot_num = n_shot_num
99
+ if 'perplexity' in task.metric: # perplexity is the same for 0-shot and 5-shot and is calculated only with 0-shot
100
+ task_n_shot_num = 0
101
+
102
  # We average all scores of a given metric (not all metrics are present in all files)
103
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
104
+ task.benchmark == k and n_shot.get(k, -1) == task_n_shot_num])
105
  if accs.size == 0 or any([acc is None for acc in accs]):
106
  continue
107