|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ZERO_SHOT_PROMPT = """A chat between a curious human and an artificial intelligence assistant. |
|
The assistant gives helpful, detailed, and polite answers to the human's questions. |
|
Human: {{ user_message }} |
|
Assistant: """ |
|
|
|
ZERO_SHOT_STOPWORD = "Human:" |
|
|
|
LM_PROMPT = """Give the best continuation of the following text: {{ user_message }}""" |
|
|
|
LLAMA2_PROMPT = """<s>[INST] <<SYS>> |
|
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. |
|
|
|
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. |
|
<</SYS>> |
|
|
|
{{ user_message }} [/INST] """ |
|
|
|
LLAMA2_STOPWORD = "</s>" |
|
|
|
MPT_PROMPT_7B = """<|im_start|>system |
|
- You are a helpful assistant chatbot trained by MosaicML. |
|
- You answer questions. |
|
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. |
|
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|> |
|
<|im_start|>user |
|
{{ user_message }}<|im_end|> |
|
<|im_start|>assistant |
|
""" |
|
|
|
MPT_LM_PROMPT_7B = """<|im_start|>system |
|
- You are a helpful assistant chatbot trained by MosaicML. |
|
- You answer questions. |
|
- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. |
|
- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|> |
|
<|im_start|>user |
|
{{ user_message }}<|im_end|> |
|
<|im_start|>assistant |
|
""" |
|
|
|
MPT_PROMPT_30B = """<|im_start|>system |
|
A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.<|im_end|> |
|
<|im_start|>user |
|
{{ user_message }}<|im_end|> |
|
<|im_start|>assistant |
|
""" |
|
|
|
MPT_STOPWORD = "<|im_end|>" |
|
|
|
FALCON_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins. |
|
User: {{ user_message }} |
|
Falcon: """ |
|
|
|
FALCON_STOPWORD = "User:" |
|
|
|
ALFRED_PROMPT = """Alfred is a large language model trained by LightOn. Knowledge cutoff: November 2022. Current date: 31 July, 2023 |
|
|
|
User: {{ user_message }} |
|
Alfred: """ |
|
|
|
ALFRED_STOPWORD = "User:" |
|
|
|
VICUNA_PROMPT = """A chat between a curious user and an artificial intelligence assistant. |
|
The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {{ user_message }} ASSISTANT: """ |
|
|
|
VICUNA_STOPWORD = "" |
|
|
|
MODELS = { |
|
|
|
|
|
|
|
"llama-2-70b": { |
|
"name": "llama-2-70b", |
|
"model_name": "NousResearch/llama-2-70b-hf", |
|
"model_path": "NousResearch-llama-2-70b-hf", |
|
"num_gpus": 4, |
|
"batch_size": 2, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"to_be_quantized": True, |
|
"to_be_watermarked": True, |
|
"model_size": 70e9, |
|
"model_family": "llama-2", |
|
}, |
|
"llama-2-13b": { |
|
"name": "llama-2-13b", |
|
"model_name": "NousResearch/llama-2-13b-hf", |
|
"model_path": "NousResearch-llama-2-13b-hf", |
|
"num_gpus": 2, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"to_be_quantized": True, |
|
"to_be_watermarked": True, |
|
"model_size": 13e9, |
|
"model_family": "llama-2", |
|
}, |
|
"llama-2-7b": { |
|
"name": "llama-2-7b", |
|
"model_name": "NousResearch/llama-2-7b-hf", |
|
"model_path": "NousResearch-llama-2-7b-hf", |
|
"num_gpus": 1, |
|
"batch_size": 4, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"to_be_quantized": True, |
|
"to_be_watermarked": True, |
|
"model_size": 7e9, |
|
"model_family": "llama-2", |
|
}, |
|
|
|
|
|
|
|
"llama-2-70b-chat": { |
|
"name": "llama-2-70b-chat", |
|
"model_name": "NousResearch/llama-2-70b-chat-hf", |
|
"model_path": "NousResearch-llama-2-70b-chat-hf", |
|
"num_gpus": 4, |
|
"batch_size": 2, |
|
"is_chat": True, |
|
"prompt": LLAMA2_PROMPT, |
|
"stopword": LLAMA2_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"model_size": 70e9, |
|
"model_family": "llama-2", |
|
}, |
|
"llama-2-13b-chat": { |
|
"name": "llama-2-13b-chat", |
|
"model_name": "NousResearch/llama-2-13b-chat-hf", |
|
"model_path": "NousResearch-llama-2-13b-chat-hf", |
|
"num_gpus": 2, |
|
"batch_size": 8, |
|
"is_chat": True, |
|
"prompt": LLAMA2_PROMPT, |
|
"stopword": LLAMA2_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"model_size": 13e9, |
|
"model_family": "llama-2", |
|
}, |
|
"llama-2-7b-chat": { |
|
"name": "llama-2-7b-chat", |
|
"model_name": "NousResearch/llama-2-7b-chat-hf", |
|
"model_path": "NousResearch-llama-2-7b-chat-hf", |
|
"num_gpus": 1, |
|
"batch_size": 4, |
|
"is_chat": True, |
|
"prompt": LLAMA2_PROMPT, |
|
"stopword": LLAMA2_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"model_size": 7e9, |
|
"model_family": "llama-2", |
|
}, |
|
|
|
|
|
|
|
"llama-65b": { |
|
"name": "llama-65b", |
|
"model_name": "huggyllama/llama-65b", |
|
"model_path": "huggyllama-llama-65b", |
|
"num_gpus": 4, |
|
"batch_size": 2, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"to_be_quantized": True, |
|
"to_be_watermarked": True, |
|
"model_size": 65e9, |
|
"model_family": "llama-1", |
|
}, |
|
"llama-30b": { |
|
"name": "llama-30b", |
|
"model_name": "huggyllama/llama-30b", |
|
"model_path": "huggyllama-llama-30b", |
|
"num_gpus": 2, |
|
"batch_size": 2, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"to_be_quantized": True, |
|
"to_be_watermarked": True, |
|
"model_size": 30e9, |
|
"model_family": "llama-1", |
|
}, |
|
"llama-13b": { |
|
"name": "llama-13b", |
|
"model_name": "huggyllama/llama-13b", |
|
"model_path": "huggyllama-llama-13b", |
|
"num_gpus": 2, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"to_be_quantized": True, |
|
"to_be_watermarked": True, |
|
"model_size": 13e9, |
|
"model_family": "llama-1", |
|
}, |
|
"llama-7b": { |
|
"name": "llama-7b", |
|
"model_name": "huggyllama/llama-7b", |
|
"model_path": "huggyllama-llama-7b", |
|
"num_gpus": 1, |
|
"batch_size": 4, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"to_be_quantized": True, |
|
"to_be_watermarked": True, |
|
"model_size": 7e9, |
|
"model_family": "llama-1", |
|
}, |
|
|
|
|
|
|
|
"opt-66b": { |
|
"name": "opt-66b", |
|
"model_name": "facebook/opt-66b", |
|
"model_path": "facebook-opt-66b", |
|
"num_gpus": 4, |
|
"batch_size": 2, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 1024, |
|
"model_size": 66e9, |
|
"model_family": "opt", |
|
}, |
|
"opt-30b": { |
|
"name": "opt-30b", |
|
"model_name": "facebook/opt-30b", |
|
"model_path": "facebook-opt-30b", |
|
"num_gpus": 4, |
|
"batch_size": 1, |
|
"is_chat": False, |
|
"no_api": True, |
|
"model_size": 30e9, |
|
"model_family": "opt", |
|
}, |
|
"opt-13b": { |
|
"name": "opt-13b", |
|
"model_name": "facebook/opt-13b", |
|
"model_path": "facebook-opt-13b", |
|
"num_gpus": 2, |
|
"batch_size": 1, |
|
"is_chat": False, |
|
"no_api": True, |
|
"model_size": 13e9, |
|
"model_family": "opt", |
|
}, |
|
"opt-6.7b": { |
|
"name": "opt-6.7b", |
|
"model_name": "facebook/opt-6.7b", |
|
"model_path": "facebook-opt-6.7b", |
|
"num_gpus": 1, |
|
"batch_size": 4, |
|
"is_chat": False, |
|
"no_api": True, |
|
"model_size": 6.7e9, |
|
"model_family": "opt", |
|
}, |
|
"opt-2.7b": { |
|
"name": "opt-2.7b", |
|
"model_name": "facebook/opt-2.7b", |
|
"model_path": "facebook-opt-2.7b", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 2.7e9, |
|
"model_family": "opt", |
|
}, |
|
"opt-1.3b": { |
|
"name": "opt-1.3b", |
|
"model_name": "facebook/opt-1.3b", |
|
"model_path": "facebook-opt-1.3b", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"use_flash_attention": True, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 1.3e9, |
|
"model_family": "opt", |
|
}, |
|
"opt-350m": { |
|
"name": "opt-350m", |
|
"model_name": "facebook/opt-350m", |
|
"model_path": "facebook-opt-350m", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"no_api": True, |
|
"model_size": 350e6, |
|
"model_family": "opt", |
|
}, |
|
"opt-125m": { |
|
"name": "opt-125m", |
|
"model_name": "facebook/opt-125m", |
|
"model_path": "facebook-opt-125m", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 125e6, |
|
"model_family": "opt", |
|
}, |
|
|
|
|
|
|
|
"mpt-30b": { |
|
"name": "mpt-30b", |
|
"model_name": "mosaicml/mpt-30b", |
|
"model_path": "mosaicml-mpt-30b", |
|
"num_gpus": 2, |
|
"batch_size": 2, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 1024, |
|
"model_size": 30e9, |
|
"model_family": "mpt", |
|
}, |
|
"mpt-7b": { |
|
"name": "mpt-7b", |
|
"model_name": "mosaicml/mpt-7b", |
|
"model_path": "mosaicml-mpt-7b", |
|
"num_gpus": 1, |
|
"batch_size": 4, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 7e9, |
|
"model_family": "mpt", |
|
}, |
|
|
|
|
|
|
|
"mpt-30b-chat": { |
|
"name": "mpt-30b-chat", |
|
"model_name": "mosaicml/mpt-30b-chat", |
|
"model_path": "mosaicml-mpt-30b-chat", |
|
"num_gpus": 2, |
|
"batch_size": 2, |
|
"is_chat": True, |
|
"prompt": MPT_PROMPT_30B, |
|
"stopword": MPT_STOPWORD, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 30e9, |
|
"model_family": "mpt", |
|
}, |
|
"mpt-7b-chat": { |
|
"name": "mpt-7b-chat", |
|
"model_name": "mosaicml/mpt-7b-chat", |
|
"model_path": "mosaicml-mpt-7b-chat", |
|
"num_gpus": 1, |
|
"batch_size": 4, |
|
"is_chat": True, |
|
"prompt": MPT_PROMPT_7B, |
|
"stopword": MPT_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 7e9, |
|
"model_family": "mpt", |
|
}, |
|
|
|
|
|
|
|
"openllama-13b": { |
|
"name": "openllama-13b", |
|
"model_name": "openlm-research/open_llama_13b", |
|
"model_path": "openlm-research-open_llama_13b", |
|
"num_gpus": 2, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 13e9, |
|
"model_family": "openllama", |
|
}, |
|
"openllama-7b": { |
|
"name": "openllama-7b", |
|
"model_name": "openlm-research/open_llama_7b", |
|
"model_path": "openlm-research-open_llama_7b", |
|
"num_gpus": 1, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 7e9, |
|
"model_family": "openllama", |
|
}, |
|
"openllama-3b": { |
|
"name": "openllama-3b", |
|
"model_name": "openlm-research/open_llama_3b", |
|
"model_path": "openlm-research-open_llama_3b", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"use_flash_attention": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 3e9, |
|
"model_family": "openllama", |
|
}, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"openllama-2-7b": { |
|
"name": "openllama-2-7b", |
|
"model_name": "openlm-research/open_llama_7b_v2", |
|
"model_path": "openlm-research-open_llama_7b_v2", |
|
"num_gpus": 1, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 7e9, |
|
"model_family": "openllama-2", |
|
}, |
|
"openllama-2-3b": { |
|
"name": "openllama-2-3b", |
|
"model_name": "openlm-research/open_llama_3b_v2", |
|
"model_path": "openlm-research-open_llama_3b_v2", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"use_flash_attention": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 3e9, |
|
"model_family": "openllama-2", |
|
}, |
|
|
|
|
|
|
|
"pythia-12b": { |
|
"name": "pythia-12b", |
|
"model_name": "EleutherAI/pythia-12b", |
|
"model_path": "EleutherAI-pythia-12b", |
|
"num_gpus": 2, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 12e9, |
|
"model_family": "pythia", |
|
}, |
|
"pythia-6.9b": { |
|
"name": "pythia-6.9b", |
|
"model_name": "EleutherAI/pythia-6.9b", |
|
"model_path": "EleutherAI-pythia-6.9b", |
|
"num_gpus": 1, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 6.9e9, |
|
"model_family": "pythia", |
|
}, |
|
"pythia-2.8b": { |
|
"name": "pythia-2.8b", |
|
"model_name": "EleutherAI/pythia-2.8b", |
|
"model_path": "EleutherAI-pythia-2.8b", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 2.8e9, |
|
"model_family": "pythia", |
|
}, |
|
"pythia-1.4b": { |
|
"name": "pythia-1.4b", |
|
"model_name": "EleutherAI/pythia-1.4b", |
|
"model_path": "EleutherAI-pythia-1.4b", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 1.4e9, |
|
"model_family": "pythia", |
|
}, |
|
"pythia-1b": { |
|
"name": "pythia-1b", |
|
"model_name": "EleutherAI/pythia-1b", |
|
"model_path": "EleutherAI-pythia-1b", |
|
"num_gpus": 1, |
|
"batch_size": 1, |
|
"is_chat": False, |
|
"use_flash_attention": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 1e9, |
|
"model_family": "pythia", |
|
}, |
|
"pythia-410m": { |
|
"name": "pythia-410m", |
|
"model_name": "EleutherAI/pythia-410m", |
|
"model_path": "EleutherAI-pythia-410m", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 410e6, |
|
"model_family": "pythia", |
|
}, |
|
"pythia-160m": { |
|
"name": "pythia-160m", |
|
"model_name": "EleutherAI/pythia-160m", |
|
"model_path": "EleutherAI-pythia-160m", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 160e6, |
|
"model_family": "pythia", |
|
}, |
|
"pythia-70m": { |
|
"name": "pythia-70m", |
|
"model_name": "EleutherAI/pythia-70m", |
|
"model_path": "EleutherAI-pythia-70m", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 70e6, |
|
"model_family": "pythia", |
|
}, |
|
|
|
|
|
|
|
"pythia-12b-deduped": { |
|
"name": "pythia-12b-deduped", |
|
"model_name": "EleutherAI/pythia-12b-deduped", |
|
"model_path": "EleutherAI-pythia-12b-deduped", |
|
"num_gpus": 2, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "pythia-deduped", |
|
"model_size": 12e9, |
|
}, |
|
"pythia-6.9b-deduped": { |
|
"name": "pythia-6.9b-deduped", |
|
"model_name": "EleutherAI/pythia-6.9b-deduped", |
|
"model_path": "EleutherAI-pythia-6.9b-deduped", |
|
"num_gpus": 1, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "pythia-deduped", |
|
"model_size": 6.9e9, |
|
}, |
|
"pythia-2.8b-deduped": { |
|
"name": "pythia-2.8b-deduped", |
|
"model_name": "EleutherAI/pythia-2.8b-deduped", |
|
"model_path": "EleutherAI-pythia-2.8b-deduped", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "pythia-deduped", |
|
"model_size": 2.8e9, |
|
}, |
|
"pythia-1.4b-deduped": { |
|
"name": "pythia-1.4b-deduped", |
|
"model_name": "EleutherAI/pythia-1.4b-deduped", |
|
"model_path": "EleutherAI-pythia-1.4b-deduped", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "pythia-deduped", |
|
"model_size": 1.4e9, |
|
}, |
|
"pythia-1b-deduped": { |
|
"name": "pythia-1b-deduped", |
|
"model_name": "EleutherAI/pythia-1b-deduped", |
|
"model_path": "EleutherAI-pythia-1b-deduped", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"use_flash_attention": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "pythia-deduped", |
|
"model_size": 1e9, |
|
}, |
|
"pythia-410m-deduped": { |
|
"name": "pythia-410m-deduped", |
|
"model_name": "EleutherAI/pythia-410m-deduped", |
|
"model_path": "EleutherAI-pythia-410m-deduped", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "pythia-deduped", |
|
"model_size": 410e6, |
|
}, |
|
"pythia-160m-deduped": { |
|
"name": "pythia-160m-deduped", |
|
"model_name": "EleutherAI/pythia-160m-deduped", |
|
"model_path": "EleutherAI-pythia-160m-deduped", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "pythia-deduped", |
|
"model_size": 160e6, |
|
}, |
|
"pythia-70m-deduped": { |
|
"name": "pythia-70m-deduped", |
|
"model_name": "EleutherAI/pythia-70m-deduped", |
|
"model_path": "EleutherAI-pythia-70m-deduped", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "pythia-deduped", |
|
"model_size": 70e6, |
|
}, |
|
|
|
|
|
|
|
"gpt2-xl": { |
|
"name": "gpt2-xl", |
|
"model_name": "gpt2-xl", |
|
"model_path": "gpt2-xl", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 1.5e9, |
|
"model_family": "gpt2", |
|
}, |
|
"gpt2-large": { |
|
"name": "gpt2-large", |
|
"model_name": "gpt2-large", |
|
"model_path": "gpt2-large", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 774e6, |
|
"model_family": "gpt2", |
|
}, |
|
"gpt2-medium": { |
|
"name": "gpt2-medium", |
|
"model_name": "gpt2-medium", |
|
"model_path": "gpt2-medium", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 355e6, |
|
"model_family": "gpt2", |
|
}, |
|
"gpt2": { |
|
"name": "gpt2", |
|
"model_name": "gpt2", |
|
"model_path": "gpt2", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 124e6, |
|
"model_family": "gpt2", |
|
}, |
|
|
|
|
|
|
|
"cerebras-gpt-13b": { |
|
"name": "cerebras-gpt-13b", |
|
"model_name": "cerebras/Cerebras-GPT-13B", |
|
"model_path": "cerebras-Cerebras-GPT-13B", |
|
"num_gpus": 1, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "cerebras", |
|
"model_size": 13e9, |
|
}, |
|
"cerebras-gpt-6.7b": { |
|
"name": "cerebras-gpt-6.7b", |
|
"model_name": "cerebras/Cerebras-GPT-6.7B", |
|
"model_path": "cerebras-Cerebras-GPT-6.7B", |
|
"num_gpus": 1, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "cerebras", |
|
"model_size": 6.7e9, |
|
}, |
|
"cerebras-gpt-2.7b": { |
|
"name": "cerebras-gpt-2.7b", |
|
"model_name": "cerebras/Cerebras-GPT-2.7B", |
|
"model_path": "cerebras-Cerebras-GPT-2.7B", |
|
"num_gpus": 1, |
|
"batch_size": 1, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "cerebras", |
|
"model_size": 2.7e9, |
|
}, |
|
"cerebras-gpt-1.3b": { |
|
"name": "cerebras-gpt-1.3b", |
|
"model_name": "cerebras/Cerebras-GPT-1.3B", |
|
"model_path": "cerebras-Cerebras-GPT-1.3B", |
|
"num_gpus": 1, |
|
"batch_size": 1, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "cerebras", |
|
"model_size": 1.3e9, |
|
}, |
|
"cerebras-gpt-256m": { |
|
"name": "cerebras-gpt-256m", |
|
"model_name": "cerebras/Cerebras-GPT-256M", |
|
"model_path": "cerebras-Cerebras-GPT-256M", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "cerebras", |
|
"model_size": 256e6, |
|
}, |
|
"cerebras-gpt-111m": { |
|
"name": "cerebras-gpt-111m", |
|
"model_name": "cerebras/Cerebras-GPT-111M", |
|
"model_path": "cerebras-Cerebras-GPT-111M", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "cerebras", |
|
"model_size": 111e6, |
|
}, |
|
|
|
|
|
|
|
"bloom-7.1b": { |
|
"name": "bloom-7.1b", |
|
"model_name": "bigscience/bloom-7b1", |
|
"model_path": "bigscience-bloom-7b1", |
|
"num_gpus": 1, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 7.1e9, |
|
"model_family": "bloom", |
|
}, |
|
"bloom-3b": { |
|
"name": "bloom-3b", |
|
"model_name": "bigscience/bloom-3b", |
|
"model_path": "bigscience-bloom-3b", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 3e9, |
|
"model_family": "bloom", |
|
}, |
|
"bloom-1.7b": { |
|
"name": "bloom-1.7b", |
|
"model_name": "bigscience/bloom-1b7", |
|
"model_path": "bigscience-bloom-1b7", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 1.7e9, |
|
"model_family": "bloom", |
|
}, |
|
"bloom-1.1b": { |
|
"name": "bloom-1.1b", |
|
"model_name": "bigscience/bloom-1b1", |
|
"model_path": "bigscience-bloom-1b1", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 1.1e9, |
|
"model_family": "bloom", |
|
}, |
|
"bloom-560m": { |
|
"name": "bloom-560m", |
|
"model_name": "bigscience/bloom-560m", |
|
"model_path": "bigscience-bloom-560m", |
|
"num_gpus": 1, |
|
"batch_size": 16, |
|
"is_chat": False, |
|
"max_total_tokens": 1024, |
|
"max_input_length": 256, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 560e6, |
|
"model_family": "bloom", |
|
}, |
|
|
|
|
|
|
|
"falcon-40b": { |
|
"name": "falcon-40b", |
|
"model_name": "tiiuae/falcon-40b", |
|
"model_path": "tiiuae-falcon-40b", |
|
"num_gpus": 4, |
|
"batch_size": 4, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 40e9, |
|
"model_family": "falcon", |
|
}, |
|
"falcon-7b": { |
|
"name": "falcon-7b", |
|
"model_name": "tiiuae/falcon-7b", |
|
"model_path": "tiiuae-falcon-7b", |
|
"num_gpus": 1, |
|
"batch_size": 8, |
|
"is_chat": False, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_size": 7e9, |
|
"model_family": "falcon", |
|
}, |
|
|
|
|
|
|
|
"falcon-40b-instruct": { |
|
"name": "falcon-40b-instruct", |
|
"model_name": "tiiuae/falcon-40b-instruct", |
|
"model_path": "tiiuae-falcon-40b-instruct", |
|
"num_gpus": 4, |
|
"batch_size": 4, |
|
"is_chat": True, |
|
"prompt": FALCON_PROMPT, |
|
"stopword": FALCON_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "falcon", |
|
"model_size": 40e9, |
|
}, |
|
"falcon-7b-instruct": { |
|
"name": "falcon-7b-instruct", |
|
"model_name": "tiiuae/falcon-7b-instruct", |
|
"model_path": "tiiuae-falcon-7b-instruct", |
|
"num_gpus": 1, |
|
"batch_size": 5, |
|
"is_chat": True, |
|
"prompt": FALCON_PROMPT, |
|
"stopword": FALCON_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "falcon", |
|
"model_size": 7e9, |
|
}, |
|
"alfred-40b-0723": { |
|
"name": "alfred-40b-0723", |
|
"model_name": "lightonai/alfred-40b-0723", |
|
"model_path": "lightonai-alfred-40b-0723", |
|
"num_gpus": 4, |
|
"batch_size": 4, |
|
"is_chat": True, |
|
"prompt": ALFRED_PROMPT, |
|
"stopword": ALFRED_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "falcon", |
|
"model_size": 40e9, |
|
}, |
|
|
|
|
|
|
|
"vicuna-33b-v1.3": { |
|
"name": "vicuna-33b-v1.3", |
|
"model_name": "lmsys/vicuna-33b-v1.3", |
|
"model_path": "lmsys-vicuna-33b-v1.3", |
|
"num_gpus": 2, |
|
"batch_size": 2, |
|
"is_chat": True, |
|
"prompt": VICUNA_PROMPT, |
|
"stopword": VICUNA_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "vicuna", |
|
"model_size": 33e9, |
|
}, |
|
"vicuna-13b-v1.3": { |
|
"name": "vicuna-13b-v1.3", |
|
"model_name": "lmsys/vicuna-13b-v1.3", |
|
"model_path": "lmsys-vicuna-13b-v1.3", |
|
"num_gpus": 2, |
|
"batch_size": 8, |
|
"is_chat": True, |
|
"prompt": VICUNA_PROMPT, |
|
"stopword": VICUNA_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "vicuna", |
|
"model_size": 13e9, |
|
}, |
|
"vicuna-7b-v1.3": { |
|
"name": "vicuna-7b-v1.3", |
|
"model_name": "lmsys/vicuna-7b-v1.3", |
|
"model_path": "lmsys-vicuna-7b-v1.3", |
|
"num_gpus": 1, |
|
"batch_size": 4, |
|
"is_chat": True, |
|
"prompt": VICUNA_PROMPT, |
|
"stopword": VICUNA_STOPWORD, |
|
"max_total_tokens": 2048, |
|
"max_input_length": 1024, |
|
"max_batch_prefill_tokens": 4096, |
|
"model_family": "vicuna", |
|
"model_size": 7e9, |
|
}, |
|
} |
|
|
|
|
|
MODEL_FAMILY_PRETRAINING_DATASETS = { |
|
"llama-2": ["UNK-commoncrawl"], |
|
"llama-1": [ |
|
"llama", |
|
"c4", |
|
"github", |
|
"wikipedia", |
|
"books3", |
|
"gutenberg", |
|
"arxiv", |
|
"stackexchange", |
|
], |
|
"openllama": [ |
|
"redpajama", |
|
"c4", |
|
"github", |
|
"wikipedia", |
|
"books3", |
|
"gutenberg", |
|
"arxiv", |
|
"stackexchange", |
|
], |
|
"openllama-2": [ |
|
"refinedweb", |
|
"github", |
|
"wikipedia", |
|
"books3", |
|
"gutenberg", |
|
"arxiv", |
|
"stackexchange", |
|
], |
|
"pythia": [ |
|
"thepile", |
|
"pubmed", |
|
"books3", |
|
"arxiv", |
|
"github", |
|
"openwebtext2", |
|
"freelaw", |
|
"wikipedia", |
|
"stackexchange", |
|
"uspto", |
|
"gutenberg", |
|
"opensubtitles", |
|
"mathematics", |
|
"bookcorpus2", |
|
"ubuntuIRC", |
|
"europarl", |
|
"philpapers", |
|
"nih-grants" "hackernews", |
|
"enron", |
|
], |
|
"gpt2": ["openwebtext"], |
|
"cerebras": [ |
|
"thepile", |
|
"pubmed", |
|
"books3", |
|
"arxiv", |
|
"github", |
|
"openwebtext2", |
|
"freelaw", |
|
"wikipedia", |
|
"stackexchange", |
|
"uspto", |
|
"gutenberg", |
|
"opensubtitles", |
|
"mathematics", |
|
"bookcorpus2", |
|
"ubuntuIRC", |
|
"europarl", |
|
"philpapers", |
|
"nih-grants" "hackernews", |
|
"enron", |
|
], |
|
"bloom": [ |
|
"oscar", |
|
"github", |
|
"commoncrawl-bloom", |
|
], |
|
"falcon": [ |
|
"refinedweb", |
|
"pubmed", |
|
"books3", |
|
"arxiv", |
|
"github", |
|
"openwebtext2", |
|
"freelaw", |
|
"wikipedia", |
|
"stackexchange", |
|
"uspto", |
|
"gutenberg", |
|
"opensubtitles", |
|
"mathematics", |
|
"bookcorpus2", |
|
"ubuntuIRC", |
|
"europarl", |
|
"philpapers", |
|
"nih-grants" "hackernews", |
|
"enron", |
|
], |
|
"mpt": [ |
|
"c4", |
|
"mc4", |
|
"redpajama", |
|
"github", |
|
"wikipedia", |
|
"books3", |
|
"gutenberg", |
|
"arxiv", |
|
"stackexchange", |
|
], |
|
"opt": [ |
|
"cc-news", |
|
"cc-stories", |
|
"thepile", |
|
"reddit" "pubmed", |
|
"books3", |
|
"github", |
|
"openwebtext2", |
|
"wikipedia", |
|
"uspto", |
|
"gutenberg", |
|
"opensubtitles", |
|
"mathematics", |
|
"bookcorpus2", |
|
"hackernews", |
|
], |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
print(len(MODELS)) |
|
print("\n".join(MODELS.keys())) |
|
|