Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
change bele task
Browse files- app.py +4 -0
- src/about.py +2 -2
- src/leaderboard/read_evals.py +4 -1
app.py
CHANGED
@@ -68,6 +68,10 @@ leaderboard_df = original_df.copy()
|
|
68 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
69 |
|
70 |
def style_df(df: pd.DataFrame) -> Styler:
|
|
|
|
|
|
|
|
|
71 |
leaderboard_df_styled = df.style.background_gradient(cmap="viridis")
|
72 |
rounding = {'#Params (B)': "{:.1f}"}
|
73 |
for task in Tasks:
|
|
|
68 |
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
69 |
|
70 |
def style_df(df: pd.DataFrame) -> Styler:
|
71 |
+
# new_df = df.copy(deep=True)
|
72 |
+
# new_df['polish_poleval2018_task3_test_10k'] = -new_df['polish_poleval2018_task3_test_10k']
|
73 |
+
# new_df = new_df.to_frame()
|
74 |
+
|
75 |
leaderboard_df_styled = df.style.background_gradient(cmap="viridis")
|
76 |
rounding = {'#Params (B)': "{:.1f}"}
|
77 |
for task in Tasks:
|
src/about.py
CHANGED
@@ -13,14 +13,14 @@ class Task:
|
|
13 |
# ---------------------------------------------------
|
14 |
class Tasks(Enum):
|
15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
16 |
-
task2 = Task("belebele_pol_Latn", "acc,none", "belebele_pol_Latn", "multiple_choice")
|
17 |
task3 = Task("polemo2_in", "exact_match,score-first", "polemo2-in_g", "generate_until")
|
18 |
task4 = Task("polemo2_in_multiple_choice", "acc,none", "polemo2-in_mc", "multiple_choice")
|
19 |
task5 = Task("polemo2_out", "exact_match,score-first", "polemo2-out_g", "generate_until")
|
20 |
task6 = Task("polemo2_out_multiple_choice", "acc,none", "polemo2-out_mc", "multiple_choice")
|
21 |
task7 = Task("polish_8tags_multiple_choice", "acc,none", "8tags_mc", "multiple_choice")
|
22 |
task8 = Task("polish_8tags_regex", "exact_match,score-first", "8tags_g", "generate_until")
|
23 |
-
|
24 |
task9 = Task("polish_belebele_regex", "exact_match,score-first", "belebele_g", "generate_until")
|
25 |
task10 = Task("polish_dyk_multiple_choice", "f1,none", "dyk_mc", "multiple_choice")
|
26 |
task11 = Task("polish_dyk_regex", "f1,score-first", "dyk_g", "generate_until")
|
|
|
13 |
# ---------------------------------------------------
|
14 |
class Tasks(Enum):
|
15 |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
16 |
+
# task2 = Task("belebele_pol_Latn", "acc,none", "belebele_pol_Latn", "multiple_choice")
|
17 |
task3 = Task("polemo2_in", "exact_match,score-first", "polemo2-in_g", "generate_until")
|
18 |
task4 = Task("polemo2_in_multiple_choice", "acc,none", "polemo2-in_mc", "multiple_choice")
|
19 |
task5 = Task("polemo2_out", "exact_match,score-first", "polemo2-out_g", "generate_until")
|
20 |
task6 = Task("polemo2_out_multiple_choice", "acc,none", "polemo2-out_mc", "multiple_choice")
|
21 |
task7 = Task("polish_8tags_multiple_choice", "acc,none", "8tags_mc", "multiple_choice")
|
22 |
task8 = Task("polish_8tags_regex", "exact_match,score-first", "8tags_g", "generate_until")
|
23 |
+
task9a = Task("polish_belebele_mc", "acc,none", "belebele_mc", "multiple_choice")
|
24 |
task9 = Task("polish_belebele_regex", "exact_match,score-first", "belebele_g", "generate_until")
|
25 |
task10 = Task("polish_dyk_multiple_choice", "f1,none", "dyk_mc", "multiple_choice")
|
26 |
task11 = Task("polish_dyk_regex", "f1,score-first", "dyk_g", "generate_until")
|
src/leaderboard/read_evals.py
CHANGED
@@ -376,7 +376,10 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
376 |
# print('missing_results_for_task', missing_results_for_task)
|
377 |
for task, models in missing_results_for_task.items():
|
378 |
print(f"Missing results for {task} for {len(models)} models")
|
379 |
-
print(" ".join(models))
|
|
|
|
|
|
|
380 |
|
381 |
print(f"Missing metadata for {len(missing_metadata)} models")
|
382 |
for model in missing_metadata:
|
|
|
376 |
# print('missing_results_for_task', missing_results_for_task)
|
377 |
for task, models in missing_results_for_task.items():
|
378 |
print(f"Missing results for {task} for {len(models)} models")
|
379 |
+
# print(" ".join(models))
|
380 |
+
for model in models:
|
381 |
+
print(f'"{model}"')
|
382 |
+
print()
|
383 |
|
384 |
print(f"Missing metadata for {len(missing_metadata)} models")
|
385 |
for model in missing_metadata:
|