Spaces:

ParsBench
/

leaderboard

Running

shahriarshm commited on 13 days ago

Commit

99879ab

•

2 Parent(s): 0088478 59f2d4b

Merge open llm leaderboard last changes

Files changed (5) hide show

README.md CHANGED Viewed

@@ -43,4 +43,4 @@ If you encounter problem on the space, don't hesitate to restart it to remove th
 You'll find
 - the main table' columns names and properties in `src/display/utils.py`
 - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
-- teh logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

 You'll find
 - the main table' columns names and properties in `src/display/utils.py`
 - the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
+- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

requirements.txt CHANGED Viewed

@@ -3,7 +3,7 @@ black
 datasets
 gradio
 gradio[oauth]
-gradio_leaderboard==0.0.9
 gradio_client
 huggingface-hub>=0.18.0
 matplotlib

 datasets
 gradio
 gradio[oauth]
+gradio_leaderboard==0.0.13
 gradio_client
 huggingface-hub>=0.18.0
 matplotlib

src/display/css_html_js.py CHANGED Viewed

@@ -39,8 +39,8 @@ custom_css = """
 }
 /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
-table td:first-child,
-table th:first-child {
     max-width: 400px;
     overflow: auto;
     white-space: nowrap;

 }
 /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
+#leaderboard-table td:nth-child(2),
+#leaderboard-table th:nth-child(2) {
     max-width: 400px;
     overflow: auto;
     white-space: nowrap;

src/display/utils.py CHANGED Viewed

@@ -91,10 +91,6 @@ class WeightType(Enum):
 class Precision(Enum):
     float16 = ModelDetails("float16")
     bfloat16 = ModelDetails("bfloat16")
-    float32 = ModelDetails("float32")
-    #qt_8bit = ModelDetails("8bit")
-    #qt_4bit = ModelDetails("4bit")
-    #qt_GPTQ = ModelDetails("GPTQ")
     Unknown = ModelDetails("?")
     def from_str(precision):
@@ -102,14 +98,6 @@ class Precision(Enum):
             return Precision.float16
         if precision in ["torch.bfloat16", "bfloat16"]:
             return Precision.bfloat16
-        if precision in ["float32"]:
-            return Precision.float32
-        #if precision in ["8bit"]:
-        #    return Precision.qt_8bit
-        #if precision in ["4bit"]:
-        #    return Precision.qt_4bit
-        #if precision in ["GPTQ", "None"]:
-        #    return Precision.qt_GPTQ
         return Precision.Unknown
 # Column selection

 class Precision(Enum):
     float16 = ModelDetails("float16")
     bfloat16 = ModelDetails("bfloat16")
     Unknown = ModelDetails("?")
     def from_str(precision):
             return Precision.float16
         if precision in ["torch.bfloat16", "bfloat16"]:
             return Precision.bfloat16
         return Precision.Unknown
 # Column selection

src/leaderboard/read_evals.py CHANGED Viewed

@@ -190,10 +190,10 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
     results = []
     for v in eval_results.values():
-        # try:
-        v.to_dict() # we test if the dict version is complete
-        results.append(v)
-        # except KeyError:  # not all eval values present
-            # continue
     return results

     results = []
     for v in eval_results.values():
+        try:
+            v.to_dict() # we test if the dict version is complete
+            results.append(v)
+        except KeyError:  # not all eval values present
+            continue
     return results