Adding Evaluation Results
#4
by
mkurman
- opened
README.md
CHANGED
@@ -32,6 +32,9 @@ model-index:
|
|
32 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
33 |
value: 55.37
|
34 |
name: strict accuracy
|
|
|
|
|
|
|
35 |
source:
|
36 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
37 |
name: Open LLM Leaderboard
|
@@ -50,6 +53,9 @@ model-index:
|
|
50 |
- type: acc_norm
|
51 |
value: 7.17
|
52 |
name: normalized accuracy
|
|
|
|
|
|
|
53 |
source:
|
54 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
55 |
name: Open LLM Leaderboard
|
@@ -68,6 +74,9 @@ model-index:
|
|
68 |
- type: exact_match
|
69 |
value: 1.28
|
70 |
name: exact match
|
|
|
|
|
|
|
71 |
source:
|
72 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
73 |
name: Open LLM Leaderboard
|
@@ -86,6 +95,9 @@ model-index:
|
|
86 |
- type: acc_norm
|
87 |
value: 0.45
|
88 |
name: acc_norm
|
|
|
|
|
|
|
89 |
source:
|
90 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
91 |
name: Open LLM Leaderboard
|
@@ -104,6 +116,9 @@ model-index:
|
|
104 |
- type: acc_norm
|
105 |
value: 0.13
|
106 |
name: acc_norm
|
|
|
|
|
|
|
107 |
source:
|
108 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
109 |
name: Open LLM Leaderboard
|
@@ -124,6 +139,9 @@ model-index:
|
|
124 |
- type: acc
|
125 |
value: 7.17
|
126 |
name: accuracy
|
|
|
|
|
|
|
127 |
source:
|
128 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
129 |
name: Open LLM Leaderboard
|
@@ -179,3 +197,17 @@ Detailed results can be found [here](https://huggingface.co/datasets/open-llm-le
|
|
179 |
|MuSR (0-shot) | 0.13|
|
180 |
|MMLU-PRO (5-shot) | 7.17|
|
181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
- type: inst_level_strict_acc and prompt_level_strict_acc
|
33 |
value: 55.37
|
34 |
name: strict accuracy
|
35 |
+
- type: inst_level_strict_acc and prompt_level_strict_acc
|
36 |
+
value: 55.37
|
37 |
+
name: strict accuracy
|
38 |
source:
|
39 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
40 |
name: Open LLM Leaderboard
|
|
|
53 |
- type: acc_norm
|
54 |
value: 7.17
|
55 |
name: normalized accuracy
|
56 |
+
- type: acc_norm
|
57 |
+
value: 7.17
|
58 |
+
name: normalized accuracy
|
59 |
source:
|
60 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
61 |
name: Open LLM Leaderboard
|
|
|
74 |
- type: exact_match
|
75 |
value: 1.28
|
76 |
name: exact match
|
77 |
+
- type: exact_match
|
78 |
+
value: 1.28
|
79 |
+
name: exact match
|
80 |
source:
|
81 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
82 |
name: Open LLM Leaderboard
|
|
|
95 |
- type: acc_norm
|
96 |
value: 0.45
|
97 |
name: acc_norm
|
98 |
+
- type: acc_norm
|
99 |
+
value: 0.45
|
100 |
+
name: acc_norm
|
101 |
source:
|
102 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
103 |
name: Open LLM Leaderboard
|
|
|
116 |
- type: acc_norm
|
117 |
value: 0.13
|
118 |
name: acc_norm
|
119 |
+
- type: acc_norm
|
120 |
+
value: 0.13
|
121 |
+
name: acc_norm
|
122 |
source:
|
123 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
124 |
name: Open LLM Leaderboard
|
|
|
139 |
- type: acc
|
140 |
value: 7.17
|
141 |
name: accuracy
|
142 |
+
- type: acc
|
143 |
+
value: 7.17
|
144 |
+
name: accuracy
|
145 |
source:
|
146 |
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=meditsolutions/Llama-3.2-SUN-2.4B-v1.0.0
|
147 |
name: Open LLM Leaderboard
|
|
|
197 |
|MuSR (0-shot) | 0.13|
|
198 |
|MMLU-PRO (5-shot) | 7.17|
|
199 |
|
200 |
+
|
201 |
+
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
|
202 |
+
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_meditsolutions__Llama-3.2-SUN-2.5B-chat)
|
203 |
+
|
204 |
+
| Metric |Value|
|
205 |
+
|-------------------|----:|
|
206 |
+
|Avg. |11.93|
|
207 |
+
|IFEval (0-Shot) |55.37|
|
208 |
+
|BBH (3-Shot) | 7.17|
|
209 |
+
|MATH Lvl 5 (4-Shot)| 1.28|
|
210 |
+
|GPQA (0-shot) | 0.45|
|
211 |
+
|MuSR (0-shot) | 0.13|
|
212 |
+
|MMLU-PRO (5-shot) | 7.17|
|
213 |
+
|