mirageco commited on
Commit
ee62fba
1 Parent(s): e9d718d

Display results on the dashboard even if the result is missing by filling in "missings" into the column

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +10 -2
src/leaderboard/read_evals.py CHANGED
@@ -86,6 +86,9 @@ class EvalResult:
86
  missing_benchmarks = task_benchmarks - results.keys()
87
  if missing_benchmarks:
88
  print(f"(Missing results) Model {model} is missing {', '.join(missing_benchmarks)} from result files")
 
 
 
89
 
90
 
91
  return self(
@@ -157,11 +160,16 @@ class EvalResult:
157
  # Calculate the mean for each category and add to data_dict
158
  data_dict = {}
159
  for category, scores in category_averages.items():
160
- average = sum(scores) / len(scores) if scores else 0
 
 
 
 
 
161
  data_dict[category] = average
162
 
163
  # Overall average
164
- total_scores = [v for v in self.results.values() if v is not None]
165
  overall_average = sum(total_scores) / len(total_scores) if total_scores else 0
166
 
167
  # Add other columns
 
86
  missing_benchmarks = task_benchmarks - results.keys()
87
  if missing_benchmarks:
88
  print(f"(Missing results) Model {model} is missing {', '.join(missing_benchmarks)} from result files")
89
+ for benchmark in missing_benchmarks:
90
+ results[benchmark] = "missing"
91
+
92
 
93
 
94
  return self(
 
160
  # Calculate the mean for each category and add to data_dict
161
  data_dict = {}
162
  for category, scores in category_averages.items():
163
+ # Calculate the average if there are valid scores, otherwise set to 0
164
+ valid_scores = [score for score in scores if score != "missing"]
165
+ if valid_scores:
166
+ average = sum(valid_scores) / len(valid_scores)
167
+ else:
168
+ average = 0
169
  data_dict[category] = average
170
 
171
  # Overall average
172
+ total_scores = [v for v in self.results.values() if v != "missing"]
173
  overall_average = sum(total_scores) / len(total_scores) if total_scores else 0
174
 
175
  # Add other columns