benchbench / assets /mybench_240901.csv
Yotam-Perlitz
replace mybench
32f9aec
raw
history blame
609 Bytes
model,agentbench
gpt-4-0613,4.01
claude-2,2.49
claude-v1.3,2.44
gpt-3.5-turbo-0613,2.32
text-davinci-003,1.71
claude-instant-v1.1,1.60
chat-bison-001,1.39
text-davinci-002,1.25
llama-2-70b-chat,0.78
guanaco-65b,0.54
codellama-34b-instruct,0.96
vicuna-33b-v1.3,0.73
wizardlm-30b-v1.0,0.46
guanaco-33b,0.39
vicuna-13b-v1.5,0.93
llama-2-13b-chat,0.77
openchat-13b-v3.2,0.70
wizardlm-13b-v1.2,0.66
vicuna-7b-v1.5,0.56
codellama-13b-instruct,0.56
codellama-7b-instruct,0.50
koala-13b,0.34
llama-2-7b-chat,0.34
codegeex2-6b,0.27
dolly-12b-v2,0.14
chatglm-6b-v1.1,0.11
oasst-12b-sft-4,0.03