benchbench / assets /mybench.csv
Yotam-Perlitz
add example download option
f7f5843
raw
history blame
2.28 kB
model,score,scenario
claude_3_5_sonnet_20240620,61.16,mybench_average
gpt_4o_2024_05_13,54.96,mybench_average
gpt_4_turbo_2024_04_09,53,mybench_average
gpt_4_1106_preview,52.17,mybench_average
claude_3_opus_20240229,50.75,mybench_average
gpt_4_0125_preview,49.39,mybench_average
deepseek_coder_v2,46.79,mybench_average
gemini_1.5_pro_api_0514,44.35,mybench_average
gemma_2_27b_it,41.22,mybench_average
gemini_1.5_flash_api_0514,40.89,mybench_average
qwen2_72b_instruct,40.16,mybench_average
acm_rewrite_qwen2_72b_chat,39.6,mybench_average
mistral_large_2402,38.92,mybench_average
deepseek_chat_v2,38.39,mybench_average
claude_3_sonnet_20240229,38.08,mybench_average
meta_llama_3_70b_instruct,37.38,mybench_average
claude_3_haiku_20240307,35.32,mybench_average
mixtral_8x22b_instruct_v0.1,34.84,mybench_average
gpt_3.5_turbo_0125,34.43,mybench_average
gpt_3.5_turbo_1106,34.14,mybench_average
command_r_plus,32.86,mybench_average
mistral_small_2402,32.8,mybench_average
gemma_2_9b_it,31.57,mybench_average
phi_3_medium_4k_instruct,30.33,mybench_average
phi_3_medium_128k_instruct,29.64,mybench_average
deepseek_coder_v2_lite_instruct,29.15,mybench_average
qwen1.5_110b_chat,28.96,mybench_average
qwen1.5_72b_chat,28.89,mybench_average
command_r,27.23,mybench_average
phi_3_small_128k_instruct,27.19,mybench_average
meta_llama_3_8b_instruct,26.67,mybench_average
qwen2_7b_instruct,26.45,mybench_average
phi_3_small_8k_instruct,26.24,mybench_average
openhermes_2.5_mistral_7b,23.3,mybench_average
mixtral_8x7b_instruct_v0.1,22.5,mybench_average
mistral_7b_instruct_v0.2,19.33,mybench_average
phi_3_mini_4k_instruct,19.27,mybench_average
zephyr_7b_alpha,19.22,mybench_average
phi_3_mini_128k_instruct,18.04,mybench_average
zephyr_7b_beta,17.32,mybench_average
deepseek_v2_lite_chat,17.14,mybench_average
qwen1.5_7b_chat,16.5,mybench_average
starling_lm_7b_beta,16.44,mybench_average
vicuna_7b_v1.5_16k,13.71,mybench_average
vicuna_7b_v1.5,11.73,mybench_average
qwen1.5_4b_chat,11.13,mybench_average
llama_2_7b_chat,10.25,mybench_average
qwen2_1.5b_instruct,9.96,mybench_average
yi_6b_chat,8.79,mybench_average
qwen2_0.5b_instruct,6.78,mybench_average
qwen1.5_1.8b_chat,6.09,mybench_average
qwen1.5_0.5b_chat,5.26,mybench_average