djstrong commited on
Commit
0a713c8
1 Parent(s): 57083a7

in progress private

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +4 -4
src/leaderboard/read_evals.py CHANGED
@@ -452,14 +452,14 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
452
  task_name = f"{r['n_shot']}|{task.value.benchmark}"
453
  if task_name in missing_results_for_task:
454
  missing_results_for_task[task_name].append(f"{v.full_model}|{v.org_and_model}")
455
- if v.still_on_hub and task.value.benchmark in all_tasks:
456
- for_run.append([r["n_shot"], task.value.benchmark, v.full_model])
457
  in_progress=True
458
  # print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
459
  else:
460
  missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
461
- if v.still_on_hub and task.value.benchmark in all_tasks:
462
- for_run.append([r["n_shot"], task.value.benchmark, v.full_model])
463
  in_progress=True
464
  # print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
465
  if in_progress:
 
452
  task_name = f"{r['n_shot']}|{task.value.benchmark}"
453
  if task_name in missing_results_for_task:
454
  missing_results_for_task[task_name].append(f"{v.full_model}|{v.org_and_model}")
455
+ if task.value.benchmark in all_tasks:
456
+ if v.still_on_hub: for_run.append([r["n_shot"], task.value.benchmark, v.full_model])
457
  in_progress=True
458
  # print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
459
  else:
460
  missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
461
+ if task.value.benchmark in all_tasks:
462
+ if v.still_on_hub: for_run.append([r["n_shot"], task.value.benchmark, v.full_model])
463
  in_progress=True
464
  # print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r["n_shot"]} {task.value.benchmark} {v.full_model}"')
465
  if in_progress: