djstrong commited on
Commit
b5aa7e1
1 Parent(s): d488d58

change bele task

Browse files
Files changed (3) hide show
  1. app.py +4 -0
  2. src/about.py +2 -2
  3. src/leaderboard/read_evals.py +4 -1
app.py CHANGED
@@ -68,6 +68,10 @@ leaderboard_df = original_df.copy()
68
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
69
 
70
  def style_df(df: pd.DataFrame) -> Styler:
 
 
 
 
71
  leaderboard_df_styled = df.style.background_gradient(cmap="viridis")
72
  rounding = {'#Params (B)': "{:.1f}"}
73
  for task in Tasks:
 
68
  ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
69
 
70
  def style_df(df: pd.DataFrame) -> Styler:
71
+ # new_df = df.copy(deep=True)
72
+ # new_df['polish_poleval2018_task3_test_10k'] = -new_df['polish_poleval2018_task3_test_10k']
73
+ # new_df = new_df.to_frame()
74
+
75
  leaderboard_df_styled = df.style.background_gradient(cmap="viridis")
76
  rounding = {'#Params (B)': "{:.1f}"}
77
  for task in Tasks:
src/about.py CHANGED
@@ -13,14 +13,14 @@ class Task:
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
- task2 = Task("belebele_pol_Latn", "acc,none", "belebele_pol_Latn", "multiple_choice")
17
  task3 = Task("polemo2_in", "exact_match,score-first", "polemo2-in_g", "generate_until")
18
  task4 = Task("polemo2_in_multiple_choice", "acc,none", "polemo2-in_mc", "multiple_choice")
19
  task5 = Task("polemo2_out", "exact_match,score-first", "polemo2-out_g", "generate_until")
20
  task6 = Task("polemo2_out_multiple_choice", "acc,none", "polemo2-out_mc", "multiple_choice")
21
  task7 = Task("polish_8tags_multiple_choice", "acc,none", "8tags_mc", "multiple_choice")
22
  task8 = Task("polish_8tags_regex", "exact_match,score-first", "8tags_g", "generate_until")
23
- #task9a = Task("polish_belebele_mc", "acc,none", "belebele_mc", "multiple_choice")
24
  task9 = Task("polish_belebele_regex", "exact_match,score-first", "belebele_g", "generate_until")
25
  task10 = Task("polish_dyk_multiple_choice", "f1,none", "dyk_mc", "multiple_choice")
26
  task11 = Task("polish_dyk_regex", "f1,score-first", "dyk_g", "generate_until")
 
13
  # ---------------------------------------------------
14
  class Tasks(Enum):
15
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
16
+ # task2 = Task("belebele_pol_Latn", "acc,none", "belebele_pol_Latn", "multiple_choice")
17
  task3 = Task("polemo2_in", "exact_match,score-first", "polemo2-in_g", "generate_until")
18
  task4 = Task("polemo2_in_multiple_choice", "acc,none", "polemo2-in_mc", "multiple_choice")
19
  task5 = Task("polemo2_out", "exact_match,score-first", "polemo2-out_g", "generate_until")
20
  task6 = Task("polemo2_out_multiple_choice", "acc,none", "polemo2-out_mc", "multiple_choice")
21
  task7 = Task("polish_8tags_multiple_choice", "acc,none", "8tags_mc", "multiple_choice")
22
  task8 = Task("polish_8tags_regex", "exact_match,score-first", "8tags_g", "generate_until")
23
+ task9a = Task("polish_belebele_mc", "acc,none", "belebele_mc", "multiple_choice")
24
  task9 = Task("polish_belebele_regex", "exact_match,score-first", "belebele_g", "generate_until")
25
  task10 = Task("polish_dyk_multiple_choice", "f1,none", "dyk_mc", "multiple_choice")
26
  task11 = Task("polish_dyk_regex", "f1,score-first", "dyk_g", "generate_until")
src/leaderboard/read_evals.py CHANGED
@@ -376,7 +376,10 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
376
  # print('missing_results_for_task', missing_results_for_task)
377
  for task, models in missing_results_for_task.items():
378
  print(f"Missing results for {task} for {len(models)} models")
379
- print(" ".join(models))
 
 
 
380
 
381
  print(f"Missing metadata for {len(missing_metadata)} models")
382
  for model in missing_metadata:
 
376
  # print('missing_results_for_task', missing_results_for_task)
377
  for task, models in missing_results_for_task.items():
378
  print(f"Missing results for {task} for {len(models)} models")
379
+ # print(" ".join(models))
380
+ for model in models:
381
+ print(f'"{model}"')
382
+ print()
383
 
384
  print(f"Missing metadata for {len(missing_metadata)} models")
385
  for model in missing_metadata: