Spaces:
Running
Running
shahriarshm
commited on
Merge open llm leaderboard last changes
Browse files- README.md +1 -1
- requirements.txt +1 -1
- src/display/css_html_js.py +2 -2
- src/display/utils.py +0 -12
- src/leaderboard/read_evals.py +5 -5
README.md
CHANGED
@@ -43,4 +43,4 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
|
|
43 |
You'll find
|
44 |
- the main table' columns names and properties in `src/display/utils.py`
|
45 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
46 |
-
-
|
|
|
43 |
You'll find
|
44 |
- the main table' columns names and properties in `src/display/utils.py`
|
45 |
- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
|
46 |
+
- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`
|
requirements.txt
CHANGED
@@ -3,7 +3,7 @@ black
|
|
3 |
datasets
|
4 |
gradio
|
5 |
gradio[oauth]
|
6 |
-
gradio_leaderboard==0.0.
|
7 |
gradio_client
|
8 |
huggingface-hub>=0.18.0
|
9 |
matplotlib
|
|
|
3 |
datasets
|
4 |
gradio
|
5 |
gradio[oauth]
|
6 |
+
gradio_leaderboard==0.0.13
|
7 |
gradio_client
|
8 |
huggingface-hub>=0.18.0
|
9 |
matplotlib
|
src/display/css_html_js.py
CHANGED
@@ -39,8 +39,8 @@ custom_css = """
|
|
39 |
}
|
40 |
|
41 |
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
42 |
-
table td:
|
43 |
-
table th:
|
44 |
max-width: 400px;
|
45 |
overflow: auto;
|
46 |
white-space: nowrap;
|
|
|
39 |
}
|
40 |
|
41 |
/* Limit the width of the first AutoEvalColumn so that names don't expand too much */
|
42 |
+
#leaderboard-table td:nth-child(2),
|
43 |
+
#leaderboard-table th:nth-child(2) {
|
44 |
max-width: 400px;
|
45 |
overflow: auto;
|
46 |
white-space: nowrap;
|
src/display/utils.py
CHANGED
@@ -91,10 +91,6 @@ class WeightType(Enum):
|
|
91 |
class Precision(Enum):
|
92 |
float16 = ModelDetails("float16")
|
93 |
bfloat16 = ModelDetails("bfloat16")
|
94 |
-
float32 = ModelDetails("float32")
|
95 |
-
#qt_8bit = ModelDetails("8bit")
|
96 |
-
#qt_4bit = ModelDetails("4bit")
|
97 |
-
#qt_GPTQ = ModelDetails("GPTQ")
|
98 |
Unknown = ModelDetails("?")
|
99 |
|
100 |
def from_str(precision):
|
@@ -102,14 +98,6 @@ class Precision(Enum):
|
|
102 |
return Precision.float16
|
103 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
104 |
return Precision.bfloat16
|
105 |
-
if precision in ["float32"]:
|
106 |
-
return Precision.float32
|
107 |
-
#if precision in ["8bit"]:
|
108 |
-
# return Precision.qt_8bit
|
109 |
-
#if precision in ["4bit"]:
|
110 |
-
# return Precision.qt_4bit
|
111 |
-
#if precision in ["GPTQ", "None"]:
|
112 |
-
# return Precision.qt_GPTQ
|
113 |
return Precision.Unknown
|
114 |
|
115 |
# Column selection
|
|
|
91 |
class Precision(Enum):
|
92 |
float16 = ModelDetails("float16")
|
93 |
bfloat16 = ModelDetails("bfloat16")
|
|
|
|
|
|
|
|
|
94 |
Unknown = ModelDetails("?")
|
95 |
|
96 |
def from_str(precision):
|
|
|
98 |
return Precision.float16
|
99 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
100 |
return Precision.bfloat16
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
return Precision.Unknown
|
102 |
|
103 |
# Column selection
|
src/leaderboard/read_evals.py
CHANGED
@@ -190,10 +190,10 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
190 |
|
191 |
results = []
|
192 |
for v in eval_results.values():
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
|
199 |
return results
|
|
|
190 |
|
191 |
results = []
|
192 |
for v in eval_results.values():
|
193 |
+
try:
|
194 |
+
v.to_dict() # we test if the dict version is complete
|
195 |
+
results.append(v)
|
196 |
+
except KeyError: # not all eval values present
|
197 |
+
continue
|
198 |
|
199 |
return results
|