Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
0-shot description
Browse files- README.md +4 -4
- src/about.py +1 -1
- src/leaderboard/read_evals.py +2 -1
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.4.0
|
8 |
app_file: app.py
|
|
|
1 |
---
|
2 |
+
title: Open PL LLM Leaderboard
|
3 |
+
emoji: 🏆🇵🇱
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: red
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.4.0
|
8 |
app_file: app.py
|
src/about.py
CHANGED
@@ -37,7 +37,7 @@ NUM_FEWSHOT = 0 # Change with your few shot
|
|
37 |
|
38 |
|
39 |
# Your leaderboard name
|
40 |
-
TITLE = """<h1 align="center" id="space-title">Open PL LLM Leaderboard</h1>"""
|
41 |
|
42 |
# What does your leaderboard evaluate?
|
43 |
INTRODUCTION_TEXT = """
|
|
|
37 |
|
38 |
|
39 |
# Your leaderboard name
|
40 |
+
TITLE = """<h1 align="center" id="space-title">Open PL LLM Leaderboard (0-shot)</h1>"""
|
41 |
|
42 |
# What does your leaderboard evaluate?
|
43 |
INTRODUCTION_TEXT = """
|
src/leaderboard/read_evals.py
CHANGED
@@ -12,6 +12,7 @@ from src.display.formatting import make_clickable_model
|
|
12 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
13 |
from src.submission.check_validity import is_model_on_hub
|
14 |
|
|
|
15 |
|
16 |
@dataclass
|
17 |
class EvalResult:
|
@@ -73,7 +74,7 @@ class EvalResult:
|
|
73 |
task = task.value
|
74 |
|
75 |
# We average all scores of a given metric (not all metrics are present in all files)
|
76 |
-
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) ==
|
77 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
78 |
continue
|
79 |
|
|
|
12 |
from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
|
13 |
from src.submission.check_validity import is_model_on_hub
|
14 |
|
15 |
+
NUM_FEWSHOT = 0
|
16 |
|
17 |
@dataclass
|
18 |
class EvalResult:
|
|
|
74 |
task = task.value
|
75 |
|
76 |
# We average all scores of a given metric (not all metrics are present in all files)
|
77 |
+
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) == NUM_FEWSHOT])
|
78 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
79 |
continue
|
80 |
|