# LLAMA2
# [INST] <>
# {{ system_prompt }}
# <>
# {{ user_msg_1 }} [/INST] {{ model_answer_1 }} [INST] {{ user_msg_2 }} [/INST]
ZERO_SHOT_PROMPT = """A chat between a curious human and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the human's questions.
Human: {{ user_message }}
Assistant: """
ZERO_SHOT_STOPWORD = "Human:"
LM_PROMPT = """Give the best continuation of the following text: {{ user_message }}"""
LLAMA2_PROMPT = """[INST] <>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<>
{{ user_message }} [/INST] """
LLAMA2_STOPWORD = ""
MPT_PROMPT_7B = """<|im_start|>system
- You are a helpful assistant chatbot trained by MosaicML.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
<|im_start|>user
{{ user_message }}<|im_end|>
<|im_start|>assistant
"""
MPT_LM_PROMPT_7B = """<|im_start|>system
- You are a helpful assistant chatbot trained by MosaicML.
- You answer questions.
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>
<|im_start|>user
{{ user_message }}<|im_end|>
<|im_start|>assistant
"""
MPT_PROMPT_30B = """<|im_start|>system
A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.<|im_end|>
<|im_start|>user
{{ user_message }}<|im_end|>
<|im_start|>assistant
"""
MPT_STOPWORD = "<|im_end|>"
FALCON_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.
User: {{ user_message }}
Falcon: """
FALCON_STOPWORD = "User:"
ALFRED_PROMPT = """Alfred is a large language model trained by LightOn. Knowledge cutoff: November 2022. Current date: 31 July, 2023
User: {{ user_message }}
Alfred: """
ALFRED_STOPWORD = "User:"
VICUNA_PROMPT = """A chat between a curious user and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {{ user_message }} ASSISTANT: """
VICUNA_STOPWORD = ""
MODELS = {
################################################
# llama-2 #
################################################
"llama-2-70b": {
"name": "llama-2-70b",
"model_name": "NousResearch/llama-2-70b-hf",
"model_path": "NousResearch-llama-2-70b-hf",
"num_gpus": 4,
"batch_size": 2,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"to_be_quantized": True,
"to_be_watermarked": True,
"model_size": 70e9,
"model_family": "llama-2",
},
"llama-2-13b": {
"name": "llama-2-13b",
"model_name": "NousResearch/llama-2-13b-hf",
"model_path": "NousResearch-llama-2-13b-hf",
"num_gpus": 2,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"to_be_quantized": True,
"to_be_watermarked": True,
"model_size": 13e9,
"model_family": "llama-2",
},
"llama-2-7b": {
"name": "llama-2-7b",
"model_name": "NousResearch/llama-2-7b-hf",
"model_path": "NousResearch-llama-2-7b-hf",
"num_gpus": 1,
"batch_size": 4,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"to_be_quantized": True,
"to_be_watermarked": True,
"model_size": 7e9,
"model_family": "llama-2",
},
################################################
# llama-2 #
################################################
"llama-2-70b-chat": {
"name": "llama-2-70b-chat",
"model_name": "NousResearch/llama-2-70b-chat-hf",
"model_path": "NousResearch-llama-2-70b-chat-hf",
"num_gpus": 4,
"batch_size": 2,
"is_chat": True,
"prompt": LLAMA2_PROMPT,
"stopword": LLAMA2_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"model_size": 70e9,
"model_family": "llama-2",
},
"llama-2-13b-chat": {
"name": "llama-2-13b-chat",
"model_name": "NousResearch/llama-2-13b-chat-hf",
"model_path": "NousResearch-llama-2-13b-chat-hf",
"num_gpus": 2,
"batch_size": 8,
"is_chat": True,
"prompt": LLAMA2_PROMPT,
"stopword": LLAMA2_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"model_size": 13e9,
"model_family": "llama-2",
},
"llama-2-7b-chat": {
"name": "llama-2-7b-chat",
"model_name": "NousResearch/llama-2-7b-chat-hf",
"model_path": "NousResearch-llama-2-7b-chat-hf",
"num_gpus": 1,
"batch_size": 4,
"is_chat": True,
"prompt": LLAMA2_PROMPT,
"stopword": LLAMA2_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"model_size": 7e9,
"model_family": "llama-2",
},
################################################
# llama-1 #
################################################
"llama-65b": {
"name": "llama-65b",
"model_name": "huggyllama/llama-65b",
"model_path": "huggyllama-llama-65b",
"num_gpus": 4,
"batch_size": 2,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"to_be_quantized": True,
"to_be_watermarked": True,
"model_size": 65e9,
"model_family": "llama-1",
},
"llama-30b": {
"name": "llama-30b",
"model_name": "huggyllama/llama-30b",
"model_path": "huggyllama-llama-30b",
"num_gpus": 2,
"batch_size": 2,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"to_be_quantized": True,
"to_be_watermarked": True,
"model_size": 30e9,
"model_family": "llama-1",
},
"llama-13b": {
"name": "llama-13b",
"model_name": "huggyllama/llama-13b",
"model_path": "huggyllama-llama-13b",
"num_gpus": 2,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"to_be_quantized": True,
"to_be_watermarked": True,
"model_size": 13e9,
"model_family": "llama-1",
},
"llama-7b": {
"name": "llama-7b",
"model_name": "huggyllama/llama-7b",
"model_path": "huggyllama-llama-7b",
"num_gpus": 1,
"batch_size": 4,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"to_be_quantized": True,
"to_be_watermarked": True,
"model_size": 7e9,
"model_family": "llama-1",
},
################################################
# OPT #
################################################
"opt-66b": {
"name": "opt-66b",
"model_name": "facebook/opt-66b",
"model_path": "facebook-opt-66b",
"num_gpus": 4,
"batch_size": 2,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 1024,
"model_size": 66e9,
"model_family": "opt",
},
"opt-30b": {
"name": "opt-30b",
"model_name": "facebook/opt-30b",
"model_path": "facebook-opt-30b",
"num_gpus": 4,
"batch_size": 1,
"is_chat": False,
"no_api": True,
"model_size": 30e9,
"model_family": "opt",
},
"opt-13b": {
"name": "opt-13b",
"model_name": "facebook/opt-13b",
"model_path": "facebook-opt-13b",
"num_gpus": 2,
"batch_size": 1,
"is_chat": False,
"no_api": True,
"model_size": 13e9,
"model_family": "opt",
},
"opt-6.7b": {
"name": "opt-6.7b",
"model_name": "facebook/opt-6.7b",
"model_path": "facebook-opt-6.7b",
"num_gpus": 1,
"batch_size": 4,
"is_chat": False,
"no_api": True,
"model_size": 6.7e9,
"model_family": "opt",
},
"opt-2.7b": {
"name": "opt-2.7b",
"model_name": "facebook/opt-2.7b",
"model_path": "facebook-opt-2.7b",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 2.7e9,
"model_family": "opt",
},
"opt-1.3b": {
"name": "opt-1.3b",
"model_name": "facebook/opt-1.3b",
"model_path": "facebook-opt-1.3b",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"use_flash_attention": True,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 1.3e9,
"model_family": "opt",
},
"opt-350m": {
"name": "opt-350m",
"model_name": "facebook/opt-350m",
"model_path": "facebook-opt-350m",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"no_api": True,
"model_size": 350e6,
"model_family": "opt",
},
"opt-125m": {
"name": "opt-125m",
"model_name": "facebook/opt-125m",
"model_path": "facebook-opt-125m",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 125e6,
"model_family": "opt",
},
################################################
# MPT #
################################################
"mpt-30b": {
"name": "mpt-30b",
"model_name": "mosaicml/mpt-30b",
"model_path": "mosaicml-mpt-30b",
"num_gpus": 2,
"batch_size": 2,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 1024,
"model_size": 30e9,
"model_family": "mpt",
},
"mpt-7b": {
"name": "mpt-7b",
"model_name": "mosaicml/mpt-7b",
"model_path": "mosaicml-mpt-7b",
"num_gpus": 1,
"batch_size": 4,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 7e9,
"model_family": "mpt",
},
################################################
# MPT-Chat #
################################################
"mpt-30b-chat": {
"name": "mpt-30b-chat",
"model_name": "mosaicml/mpt-30b-chat",
"model_path": "mosaicml-mpt-30b-chat",
"num_gpus": 2,
"batch_size": 2,
"is_chat": True,
"prompt": MPT_PROMPT_30B,
"stopword": MPT_STOPWORD,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 30e9,
"model_family": "mpt",
},
"mpt-7b-chat": {
"name": "mpt-7b-chat",
"model_name": "mosaicml/mpt-7b-chat",
"model_path": "mosaicml-mpt-7b-chat",
"num_gpus": 1,
"batch_size": 4,
"is_chat": True,
"prompt": MPT_PROMPT_7B,
"stopword": MPT_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 7e9,
"model_family": "mpt",
},
################################################
# OPENLLAMA #
################################################
"openllama-13b": {
"name": "openllama-13b",
"model_name": "openlm-research/open_llama_13b",
"model_path": "openlm-research-open_llama_13b",
"num_gpus": 2,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 13e9,
"model_family": "openllama",
},
"openllama-7b": {
"name": "openllama-7b",
"model_name": "openlm-research/open_llama_7b",
"model_path": "openlm-research-open_llama_7b",
"num_gpus": 1,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 7e9,
"model_family": "openllama",
},
"openllama-3b": {
"name": "openllama-3b",
"model_name": "openlm-research/open_llama_3b",
"model_path": "openlm-research-open_llama_3b",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"use_flash_attention": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 3e9,
"model_family": "openllama",
},
################################################
# OPENLLAMA-2 #
################################################
# "openllama-2-13b": {
# "name": "openllama-2-13b",
# "model_name": "openlm-research/open_llama_13b_v2",
# "model_path": "openlm-research-open_llama_13b_v2",
# "num_gpus": 2,
# "batch_size": 1,
# "is_chat": False,
# },
"openllama-2-7b": {
"name": "openllama-2-7b",
"model_name": "openlm-research/open_llama_7b_v2",
"model_path": "openlm-research-open_llama_7b_v2",
"num_gpus": 1,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 7e9,
"model_family": "openllama-2",
},
"openllama-2-3b": {
"name": "openllama-2-3b",
"model_name": "openlm-research/open_llama_3b_v2",
"model_path": "openlm-research-open_llama_3b_v2",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"use_flash_attention": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 3e9,
"model_family": "openllama-2",
},
################################################
# Pythia #
################################################
"pythia-12b": {
"name": "pythia-12b",
"model_name": "EleutherAI/pythia-12b",
"model_path": "EleutherAI-pythia-12b",
"num_gpus": 2,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 12e9,
"model_family": "pythia",
},
"pythia-6.9b": {
"name": "pythia-6.9b",
"model_name": "EleutherAI/pythia-6.9b",
"model_path": "EleutherAI-pythia-6.9b",
"num_gpus": 1,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 6.9e9,
"model_family": "pythia",
},
"pythia-2.8b": {
"name": "pythia-2.8b",
"model_name": "EleutherAI/pythia-2.8b",
"model_path": "EleutherAI-pythia-2.8b",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 2.8e9,
"model_family": "pythia",
},
"pythia-1.4b": {
"name": "pythia-1.4b",
"model_name": "EleutherAI/pythia-1.4b",
"model_path": "EleutherAI-pythia-1.4b",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 1.4e9,
"model_family": "pythia",
},
"pythia-1b": {
"name": "pythia-1b",
"model_name": "EleutherAI/pythia-1b",
"model_path": "EleutherAI-pythia-1b",
"num_gpus": 1,
"batch_size": 1,
"is_chat": False,
"use_flash_attention": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 1e9,
"model_family": "pythia",
},
"pythia-410m": {
"name": "pythia-410m",
"model_name": "EleutherAI/pythia-410m",
"model_path": "EleutherAI-pythia-410m",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 410e6,
"model_family": "pythia",
},
"pythia-160m": {
"name": "pythia-160m",
"model_name": "EleutherAI/pythia-160m",
"model_path": "EleutherAI-pythia-160m",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 160e6,
"model_family": "pythia",
},
"pythia-70m": {
"name": "pythia-70m",
"model_name": "EleutherAI/pythia-70m",
"model_path": "EleutherAI-pythia-70m",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 70e6,
"model_family": "pythia",
},
################################################
# Pythia-deduped #
################################################
"pythia-12b-deduped": {
"name": "pythia-12b-deduped",
"model_name": "EleutherAI/pythia-12b-deduped",
"model_path": "EleutherAI-pythia-12b-deduped",
"num_gpus": 2,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "pythia-deduped",
"model_size": 12e9,
},
"pythia-6.9b-deduped": {
"name": "pythia-6.9b-deduped",
"model_name": "EleutherAI/pythia-6.9b-deduped",
"model_path": "EleutherAI-pythia-6.9b-deduped",
"num_gpus": 1,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "pythia-deduped",
"model_size": 6.9e9,
},
"pythia-2.8b-deduped": {
"name": "pythia-2.8b-deduped",
"model_name": "EleutherAI/pythia-2.8b-deduped",
"model_path": "EleutherAI-pythia-2.8b-deduped",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "pythia-deduped",
"model_size": 2.8e9,
},
"pythia-1.4b-deduped": {
"name": "pythia-1.4b-deduped",
"model_name": "EleutherAI/pythia-1.4b-deduped",
"model_path": "EleutherAI-pythia-1.4b-deduped",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "pythia-deduped",
"model_size": 1.4e9,
},
"pythia-1b-deduped": {
"name": "pythia-1b-deduped",
"model_name": "EleutherAI/pythia-1b-deduped",
"model_path": "EleutherAI-pythia-1b-deduped",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"use_flash_attention": False,
"max_total_tokens": 2048,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_family": "pythia-deduped",
"model_size": 1e9,
},
"pythia-410m-deduped": {
"name": "pythia-410m-deduped",
"model_name": "EleutherAI/pythia-410m-deduped",
"model_path": "EleutherAI-pythia-410m-deduped",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "pythia-deduped",
"model_size": 410e6,
},
"pythia-160m-deduped": {
"name": "pythia-160m-deduped",
"model_name": "EleutherAI/pythia-160m-deduped",
"model_path": "EleutherAI-pythia-160m-deduped",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "pythia-deduped",
"model_size": 160e6,
},
"pythia-70m-deduped": {
"name": "pythia-70m-deduped",
"model_name": "EleutherAI/pythia-70m-deduped",
"model_path": "EleutherAI-pythia-70m-deduped",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "pythia-deduped",
"model_size": 70e6,
},
################################################
# GPT2 #
################################################
"gpt2-xl": {
"name": "gpt2-xl",
"model_name": "gpt2-xl",
"model_path": "gpt2-xl",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 1.5e9,
"model_family": "gpt2",
},
"gpt2-large": {
"name": "gpt2-large",
"model_name": "gpt2-large",
"model_path": "gpt2-large",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 774e6,
"model_family": "gpt2",
},
"gpt2-medium": {
"name": "gpt2-medium",
"model_name": "gpt2-medium",
"model_path": "gpt2-medium",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 355e6,
"model_family": "gpt2",
},
"gpt2": {
"name": "gpt2",
"model_name": "gpt2",
"model_path": "gpt2",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 124e6,
"model_family": "gpt2",
},
################################################
# CEREBRAS #
################################################
"cerebras-gpt-13b": { # add 2 gpus but sharded equals to false
"name": "cerebras-gpt-13b",
"model_name": "cerebras/Cerebras-GPT-13B",
"model_path": "cerebras-Cerebras-GPT-13B",
"num_gpus": 1,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "cerebras",
"model_size": 13e9,
},
"cerebras-gpt-6.7b": {
"name": "cerebras-gpt-6.7b",
"model_name": "cerebras/Cerebras-GPT-6.7B",
"model_path": "cerebras-Cerebras-GPT-6.7B",
"num_gpus": 1,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_family": "cerebras",
"model_size": 6.7e9,
},
"cerebras-gpt-2.7b": {
"name": "cerebras-gpt-2.7b",
"model_name": "cerebras/Cerebras-GPT-2.7B",
"model_path": "cerebras-Cerebras-GPT-2.7B",
"num_gpus": 1,
"batch_size": 1,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "cerebras",
"model_size": 2.7e9,
},
"cerebras-gpt-1.3b": {
"name": "cerebras-gpt-1.3b",
"model_name": "cerebras/Cerebras-GPT-1.3B",
"model_path": "cerebras-Cerebras-GPT-1.3B",
"num_gpus": 1,
"batch_size": 1,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_family": "cerebras",
"model_size": 1.3e9,
},
"cerebras-gpt-256m": {
"name": "cerebras-gpt-256m",
"model_name": "cerebras/Cerebras-GPT-256M",
"model_path": "cerebras-Cerebras-GPT-256M",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "cerebras",
"model_size": 256e6,
},
"cerebras-gpt-111m": {
"name": "cerebras-gpt-111m",
"model_name": "cerebras/Cerebras-GPT-111M",
"model_path": "cerebras-Cerebras-GPT-111M",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "cerebras",
"model_size": 111e6,
},
################################################
# Bloom #
################################################
"bloom-7.1b": {
"name": "bloom-7.1b",
"model_name": "bigscience/bloom-7b1",
"model_path": "bigscience-bloom-7b1",
"num_gpus": 1,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 7.1e9,
"model_family": "bloom",
},
"bloom-3b": {
"name": "bloom-3b",
"model_name": "bigscience/bloom-3b",
"model_path": "bigscience-bloom-3b",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 3e9,
"model_family": "bloom",
},
"bloom-1.7b": {
"name": "bloom-1.7b",
"model_name": "bigscience/bloom-1b7",
"model_path": "bigscience-bloom-1b7",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 1.7e9,
"model_family": "bloom",
},
"bloom-1.1b": {
"name": "bloom-1.1b",
"model_name": "bigscience/bloom-1b1",
"model_path": "bigscience-bloom-1b1",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 1.1e9,
"model_family": "bloom",
},
"bloom-560m": {
"name": "bloom-560m",
"model_name": "bigscience/bloom-560m",
"model_path": "bigscience-bloom-560m",
"num_gpus": 1,
"batch_size": 16,
"is_chat": False,
"max_total_tokens": 1024,
"max_input_length": 256,
"max_batch_prefill_tokens": 4096,
"model_size": 560e6,
"model_family": "bloom",
},
################################################
# Falcon #
################################################
"falcon-40b": {
"name": "falcon-40b",
"model_name": "tiiuae/falcon-40b",
"model_path": "tiiuae-falcon-40b",
"num_gpus": 4,
"batch_size": 4,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 40e9,
"model_family": "falcon",
},
"falcon-7b": {
"name": "falcon-7b",
"model_name": "tiiuae/falcon-7b",
"model_path": "tiiuae-falcon-7b",
"num_gpus": 1,
"batch_size": 8,
"is_chat": False,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_size": 7e9,
"model_family": "falcon",
},
################################################
# Falcon-chat #
################################################
"falcon-40b-instruct": {
"name": "falcon-40b-instruct",
"model_name": "tiiuae/falcon-40b-instruct",
"model_path": "tiiuae-falcon-40b-instruct",
"num_gpus": 4,
"batch_size": 4,
"is_chat": True,
"prompt": FALCON_PROMPT,
"stopword": FALCON_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "falcon",
"model_size": 40e9,
},
"falcon-7b-instruct": {
"name": "falcon-7b-instruct",
"model_name": "tiiuae/falcon-7b-instruct",
"model_path": "tiiuae-falcon-7b-instruct",
"num_gpus": 1,
"batch_size": 5,
"is_chat": True,
"prompt": FALCON_PROMPT,
"stopword": FALCON_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "falcon",
"model_size": 7e9,
},
"alfred-40b-0723": {
"name": "alfred-40b-0723",
"model_name": "lightonai/alfred-40b-0723",
"model_path": "lightonai-alfred-40b-0723",
"num_gpus": 4,
"batch_size": 4,
"is_chat": True,
"prompt": ALFRED_PROMPT,
"stopword": ALFRED_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "falcon",
"model_size": 40e9,
},
################################################
# Vicuna v1.3 #
################################################
"vicuna-33b-v1.3": {
"name": "vicuna-33b-v1.3",
"model_name": "lmsys/vicuna-33b-v1.3",
"model_path": "lmsys-vicuna-33b-v1.3",
"num_gpus": 2,
"batch_size": 2,
"is_chat": True,
"prompt": VICUNA_PROMPT,
"stopword": VICUNA_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "vicuna",
"model_size": 33e9,
},
"vicuna-13b-v1.3": {
"name": "vicuna-13b-v1.3",
"model_name": "lmsys/vicuna-13b-v1.3",
"model_path": "lmsys-vicuna-13b-v1.3",
"num_gpus": 2,
"batch_size": 8,
"is_chat": True,
"prompt": VICUNA_PROMPT,
"stopword": VICUNA_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "vicuna",
"model_size": 13e9,
},
"vicuna-7b-v1.3": {
"name": "vicuna-7b-v1.3",
"model_name": "lmsys/vicuna-7b-v1.3",
"model_path": "lmsys-vicuna-7b-v1.3",
"num_gpus": 1,
"batch_size": 4,
"is_chat": True,
"prompt": VICUNA_PROMPT,
"stopword": VICUNA_STOPWORD,
"max_total_tokens": 2048,
"max_input_length": 1024,
"max_batch_prefill_tokens": 4096,
"model_family": "vicuna",
"model_size": 7e9,
},
}
MODEL_FAMILY_PRETRAINING_DATASETS = {
"llama-2": ["UNK-commoncrawl"],
"llama-1": [
"llama",
"c4",
"github",
"wikipedia",
"books3",
"gutenberg",
"arxiv",
"stackexchange",
],
"openllama": [
"redpajama",
"c4",
"github",
"wikipedia",
"books3",
"gutenberg",
"arxiv",
"stackexchange",
],
"openllama-2": [
"refinedweb",
"github",
"wikipedia",
"books3",
"gutenberg",
"arxiv",
"stackexchange",
],
"pythia": [
"thepile",
"pubmed",
"books3",
"arxiv",
"github",
"openwebtext2",
"freelaw",
"wikipedia",
"stackexchange",
"uspto",
"gutenberg",
"opensubtitles",
"mathematics",
"bookcorpus2",
"ubuntuIRC",
"europarl",
"philpapers",
"nih-grants" "hackernews",
"enron",
],
"gpt2": ["openwebtext"],
"cerebras": [
"thepile",
"pubmed",
"books3",
"arxiv",
"github",
"openwebtext2",
"freelaw",
"wikipedia",
"stackexchange",
"uspto",
"gutenberg",
"opensubtitles",
"mathematics",
"bookcorpus2",
"ubuntuIRC",
"europarl",
"philpapers",
"nih-grants" "hackernews",
"enron",
],
"bloom": [
"oscar",
"github",
"commoncrawl-bloom",
],
"falcon": [
"refinedweb",
"pubmed",
"books3",
"arxiv",
"github",
"openwebtext2",
"freelaw",
"wikipedia",
"stackexchange",
"uspto",
"gutenberg",
"opensubtitles",
"mathematics",
"bookcorpus2",
"ubuntuIRC",
"europarl",
"philpapers",
"nih-grants" "hackernews",
"enron",
],
"mpt": [
"c4",
"mc4",
"redpajama",
"github",
"wikipedia",
"books3",
"gutenberg",
"arxiv",
"stackexchange",
],
"opt": [
"cc-news",
"cc-stories",
"thepile",
"reddit" "pubmed",
"books3",
"github",
"openwebtext2",
"wikipedia",
"uspto",
"gutenberg",
"opensubtitles",
"mathematics",
"bookcorpus2",
"hackernews",
],
}
if __name__ == "__main__":
print(len(MODELS))
print("\n".join(MODELS.keys()))