Spaces:
Running
Running
eq bench
Browse files- benchmark_results.csv +1 -0
- src/about.py +1 -1
benchmark_results.csv
CHANGED
@@ -148,3 +148,4 @@ Qwen/Qwen1.5-72B-Chat,2024-06-20 18:06:58,,Qwen/Qwen1.5-72B-Chat,,,68.03,eq-benc
|
|
148 |
Qwen/Qwen2-72B,2024-06-20 18:36:22,,Qwen/Qwen2-72B,,,69.75,eq-bench_v2_pl,169.0,1,transformers, ,,
|
149 |
Qwen/Qwen2-72B-Instruct,2024-06-20 18:55:02,,Qwen/Qwen2-72B-Instruct,,,72.07,eq-bench_v2_pl,169.0,1,transformers, ,,
|
150 |
mistralai/Mixtral-8x22B-v0.1,2024-06-21 20:20:37,,mistralai/Mixtral-8x22B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,34.0 questions were parseable (min is 83%)
|
|
|
|
148 |
Qwen/Qwen2-72B,2024-06-20 18:36:22,,Qwen/Qwen2-72B,,,69.75,eq-bench_v2_pl,169.0,1,transformers, ,,
|
149 |
Qwen/Qwen2-72B-Instruct,2024-06-20 18:55:02,,Qwen/Qwen2-72B-Instruct,,,72.07,eq-bench_v2_pl,169.0,1,transformers, ,,
|
150 |
mistralai/Mixtral-8x22B-v0.1,2024-06-21 20:20:37,,mistralai/Mixtral-8x22B-v0.1,,,FAILED,eq-bench_pl,FAILED,1,transformers, ,,34.0 questions were parseable (min is 83%)
|
151 |
+
mistralai/Mixtral-8x22B-Instruct-v0.1,2024-06-26 23:40:01,,mistralai/Mixtral-8x22B-Instruct-v0.1,,,67.63,eq-bench_v2_pl,171.0,1,transformers, ,,
|
src/about.py
CHANGED
@@ -20,7 +20,7 @@ AUTHORS = """Authors:
|
|
20 |
* Automatic translation: [Remigiusz Kinas](https://www.linkedin.com/in/remigiusz-kinas/)
|
21 |
* Translation proofreading and localization: [Maria Filipkowska](https://www.linkedin.com/in/maria-filipkowska/), [Zuzanna Dabić](https://www.linkedin.com/in/zuzanna-dabic/)
|
22 |
* Preparing dataset: [Kacper Milan](https://www.linkedin.com/in/kacper-milan/)
|
23 |
-
* Running benchmark: [Krzysztof Wróbel](https://www.linkedin.com/in/wrobelkrzysztof/)
|
24 |
|
25 |
Based on: EQ-Bench: An Emotional Intelligence Benchmark for Large Language Models, Samuel J. Paech, 2023"""
|
26 |
|
|
|
20 |
* Automatic translation: [Remigiusz Kinas](https://www.linkedin.com/in/remigiusz-kinas/)
|
21 |
* Translation proofreading and localization: [Maria Filipkowska](https://www.linkedin.com/in/maria-filipkowska/), [Zuzanna Dabić](https://www.linkedin.com/in/zuzanna-dabic/)
|
22 |
* Preparing dataset: [Kacper Milan](https://www.linkedin.com/in/kacper-milan/)
|
23 |
+
* Running benchmark and leaderboard: [Krzysztof Wróbel](https://www.linkedin.com/in/wrobelkrzysztof/)
|
24 |
|
25 |
Based on: EQ-Bench: An Emotional Intelligence Benchmark for Large Language Models, Samuel J. Paech, 2023"""
|
26 |
|