TITLE = """

🏆 CLEM Leaderboard

""" INTRODUCTION_TEXT = """ The CLEM Leaderboard aims to track, rank and evaluate current cLLMs (chat-optimized Large Language Models) with the suggested pronounciation “clems”. The benchmarking approach is described in [Clembench: Using Game Play to Evaluate Chat-Optimized Language Models as Conversational Agents](https://arxiv.org/abs/2305.13455). [Github repo](https://github.com/clembench/clembench) """ SHORT_NAMES = { "t0.0": "", "claude-v1.3": "cl-1.3", "claude-2": "cl-2", "claude-2.1": "cl-2.1", "claude-instant-1.2": "cl-ins-1.2", "gpt-3.5-turbo-0613": "3.5-0613", "gpt-3.5-turbo-1106": "3.5-1106", "gpt-4-0613": "4-0613", "gpt-4-1106-preview": "4-1106", "gpt-4-0314": "4-0314", "gpt-4": "4", "text-davinci-003": "3", "luminous-supreme": "lm", "koala-13b": "ko", "falcon-40b": "flc", "falcon-7b-instruct": "fal-7b", "falcon-40b-instruct": "flc-i-40b", "oasst-12b": "oas-12b", "oasst-sft-4-pythia-12b-epoch-3.5": "ost-12b", "vicuna-13b": "vic-13b", "vicuna-33b-v1.3": "vic-33b", "sheep-duck-llama-2-70b-v1.1": "sd-l2-70b", "sheep-duck-llama-2-13b": "sd-l2-13b", "WizardLM-70b-v1.0": "w-70b", "CodeLlama-34b-Instruct-hf": "cl-34b", "command": "com", "Mistral-7B-Instruct-v0.1": "m-i-7b", "Wizard-Vicuna-13B-Uncensored-HF": "vcn-13b", "llama-2-13b-chat-hf": "l2-13b", "llama-2-70b-chat-hf": "l2-70b", "llama-2-7b-chat-hf": "l2-7b", "koala-13B-HF": "k-13b" }