Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Commit
•
6269bd0
1
Parent(s):
1a3f05a
Allow old model metrics
Browse files
src/display/changelog.py
CHANGED
@@ -1,6 +1,10 @@
|
|
1 |
CHANGELOG_TEXT = f"""
|
2 |
# Changes made to the leaderboard
|
3 |
|
4 |
-
### [
|
|
|
|
|
|
|
|
|
5 |
Protype version launched with 7 benchmarks ENEM, BLUEX, OAB Exams, ASSIN 2 RTE and STS, FAQUAD NLI and SPARROW POR
|
6 |
"""
|
|
|
1 |
CHANGELOG_TEXT = f"""
|
2 |
# Changes made to the leaderboard
|
3 |
|
4 |
+
### [1.1.0] - 2024-02-16
|
5 |
+
Removed the Sparrow POR benchmark from the leaderboard because of low quality annotations
|
6 |
+
Added HateBR Offensive, PT Hate Speech and tweetSentBR benchmarks to the leaderboard, started new evaluation queue for these benchmarks
|
7 |
+
|
8 |
+
### [1.0.0] - 2024-02-01
|
9 |
Protype version launched with 7 benchmarks ENEM, BLUEX, OAB Exams, ASSIN 2 RTE and STS, FAQUAD NLI and SPARROW POR
|
10 |
"""
|
src/leaderboard/read_evals.py
CHANGED
@@ -216,7 +216,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
216 |
with open(tmp_request_file, "r") as f:
|
217 |
req_content = json.load(f)
|
218 |
if (
|
219 |
-
req_content["status"] in ["FINISHED"]
|
220 |
and req_content["precision"] == precision.split(".")[-1]
|
221 |
):
|
222 |
request_file = tmp_request_file
|
@@ -262,7 +262,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
|
|
262 |
results = []
|
263 |
for v in eval_results.values():
|
264 |
try:
|
265 |
-
if v.status
|
266 |
v.to_dict() # we test if the dict version is complete
|
267 |
results.append(v)
|
268 |
except KeyError as e: # not all eval values present
|
|
|
216 |
with open(tmp_request_file, "r") as f:
|
217 |
req_content = json.load(f)
|
218 |
if (
|
219 |
+
req_content["status"] in ["FINISHED", "PENDING_NEW_EVAL"]
|
220 |
and req_content["precision"] == precision.split(".")[-1]
|
221 |
):
|
222 |
request_file = tmp_request_file
|
|
|
262 |
results = []
|
263 |
for v in eval_results.values():
|
264 |
try:
|
265 |
+
if v.status in ["FINISHED", "PENDING_NEW_EVAL"] and not v.hidden:
|
266 |
v.to_dict() # we test if the dict version is complete
|
267 |
results.append(v)
|
268 |
except KeyError as e: # not all eval values present
|