Spaces:
Running
Running
{ | |
"FacebookAI/xlm-roberta-base": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/FacebookAI/xlm-roberta-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">xlm-roberta-base</a>", | |
"organization": "Facebook", | |
"vocab_size": 250002, | |
"num(digit)": 2728, | |
"len(digit)": "1,3,9", | |
"num(space)": 1, | |
"len(space)": "1,1,1", | |
"num(ar)": 14644, | |
"len(ar)": "1,4,16", | |
"num(zh)": 18457, | |
"len(zh)": "1,2,16", | |
"num(ja)": 20572, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 3434, | |
"len(ja-kana)": "1,3,12", | |
"num(ko)": 5373, | |
"len(ko)": "1,2,8" | |
}, | |
"clue/roberta_chinese_clue_tiny": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/clue/roberta_chinese_clue_tiny\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">roberta-chinese-clue</a>", | |
"organization": "CLUE", | |
"vocab_size": 8021, | |
"num(digit)": 230, | |
"len(digit)": "1,4,10", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 30, | |
"len(ar)": "1,2,3", | |
"num(zh)": 5689, | |
"len(zh)": "1,1,1", | |
"num(ja)": 5691, | |
"len(ja)": "1,1,3", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"dbmdz/bert-base-german-uncased": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/dbmdz/bert-base-german-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-german-uncased</a>", | |
"organization": "dbmdz", | |
"vocab_size": 31102, | |
"num(digit)": 1733, | |
"len(digit)": "1,4,12", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 0, | |
"len(zh)": "-", | |
"num(ja)": 0, | |
"len(ja)": "-", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"google-bert/bert-base-cased": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-cased</a>", | |
"organization": "Google", | |
"vocab_size": 28996, | |
"num(digit)": 926, | |
"len(digit)": "1,4,11", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 94, | |
"len(ar)": "1,3,4", | |
"num(zh)": 226, | |
"len(zh)": "1,2,3", | |
"num(ja)": 390, | |
"len(ja)": "1,2,3", | |
"num(ja-kana)": 164, | |
"len(ja-kana)": "1,2,3", | |
"num(ko)": 10, | |
"len(ko)": "1,2,3" | |
}, | |
"google-bert/bert-base-chinese": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-chinese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-chinese</a>", | |
"organization": "Google", | |
"vocab_size": 21128, | |
"num(digit)": 1451, | |
"len(digit)": "1,3,12", | |
"num(space)": 2, | |
"len(space)": "1,2,3", | |
"num(ar)": 30, | |
"len(ar)": "1,2,3", | |
"num(zh)": 14642, | |
"len(zh)": "1,2,3", | |
"num(ja)": 15197, | |
"len(ja)": "1,3,15", | |
"num(ja-kana)": 553, | |
"len(ja-kana)": "1,3,15", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"google-bert/bert-base-german-cased": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-german-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-german-cased</a>", | |
"organization": "Google", | |
"vocab_size": 30000, | |
"num(digit)": 4065, | |
"len(digit)": "1,11,22", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 0, | |
"len(zh)": "-", | |
"num(ja)": 0, | |
"len(ja)": "-", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"google-bert/bert-base-multilingual-cased": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-multilingual-cased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-multilingual-cased</a>", | |
"organization": "Google", | |
"vocab_size": 119547, | |
"num(digit)": 2583, | |
"len(digit)": "1,3,13", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 4873, | |
"len(ar)": "1,5,14", | |
"num(zh)": 13542, | |
"len(zh)": "1,2,3", | |
"num(ja)": 14880, | |
"len(ja)": "1,3,10", | |
"num(ja-kana)": 1336, | |
"len(ja-kana)": "1,4,10", | |
"num(ko)": 3271, | |
"len(ko)": "1,3,6" | |
}, | |
"google-bert/bert-base-multilingual-uncased": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-multilingual-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-multilingual-uncased</a>", | |
"organization": "Google", | |
"vocab_size": 105879, | |
"num(digit)": 2510, | |
"len(digit)": "1,3,13", | |
"num(space)": 2, | |
"len(space)": "1,2,3", | |
"num(ar)": 4530, | |
"len(ar)": "1,5,13", | |
"num(zh)": 16658, | |
"len(zh)": "1,2,3", | |
"num(ja)": 17858, | |
"len(ja)": "1,3,10", | |
"num(ja-kana)": 1188, | |
"len(ja-kana)": "1,4,10", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"google-bert/bert-base-uncased": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-bert/bert-base-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-uncased</a>", | |
"organization": "Google", | |
"vocab_size": 30522, | |
"num(digit)": 2056, | |
"len(digit)": "1,4,11", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 88, | |
"len(ar)": "1,3,5", | |
"num(zh)": 488, | |
"len(zh)": "1,2,3", | |
"num(ja)": 676, | |
"len(ja)": "1,2,3", | |
"num(ja-kana)": 188, | |
"len(ja-kana)": "1,2,3", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"google/mobilebert-uncased": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/mobilebert-uncased\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">mobilebert-uncased</a>", | |
"organization": "Google", | |
"vocab_size": 30522, | |
"num(digit)": 2056, | |
"len(digit)": "1,4,11", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 88, | |
"len(ar)": "1,3,5", | |
"num(zh)": 488, | |
"len(zh)": "1,2,3", | |
"num(ja)": 676, | |
"len(ja)": "1,2,3", | |
"num(ja-kana)": 188, | |
"len(ja-kana)": "1,2,3", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"tohoku-nlp/bert-base-japanese": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tohoku-nlp/bert-base-japanese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-japanese</a>", | |
"organization": "Tohoku", | |
"vocab_size": 32000, | |
"num(digit)": 669, | |
"len(digit)": "1,3,5", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 10, | |
"len(ar)": "1,3,3", | |
"num(zh)": 18792, | |
"len(zh)": "1,2,11", | |
"num(ja)": 28367, | |
"len(ja)": "1,2,13", | |
"num(ja-kana)": 12359, | |
"len(ja-kana)": "1,4,13", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"gpt-4": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4</a>", | |
"organization": "OpenAI", | |
"vocab_size": 100277, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 47472, | |
"len(space)": "1,7,128", | |
"num(ar)": 113, | |
"len(ar)": "1,2,10", | |
"num(zh)": 868, | |
"len(zh)": "1,1,7", | |
"num(ja)": 1035, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 169, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 299, | |
"len(ko)": "1,2,4" | |
}, | |
"llama3": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>", | |
"organization": "Meta", | |
"vocab_size": 128256, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 60860, | |
"len(space)": "1,6,128", | |
"num(ar)": 3810, | |
"len(ar)": "1,4,11", | |
"num(zh)": 4424, | |
"len(zh)": "1,1,7", | |
"num(ja)": 5387, | |
"len(ja)": "1,2,8", | |
"num(ja-kana)": 1086, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 2281, | |
"len(ko)": "1,2,6" | |
}, | |
"google-t5/t5-large": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google-t5/t5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">t5</a>", | |
"organization": "Google", | |
"vocab_size": 32100, | |
"num(digit)": 1133, | |
"len(digit)": "1,3,13", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 0, | |
"len(zh)": "-", | |
"num(ja)": 0, | |
"len(ja)": "-", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"google/byt5-small": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/byt5-small\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">byt5-small</a>", | |
"organization": "Google", | |
"vocab_size": 384, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 10, | |
"len(space)": "1,1,1", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 0, | |
"len(zh)": "-", | |
"num(ja)": 0, | |
"len(ja)": "-", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"google/mt5-large": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/mt5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">mt5-large</a>", | |
"organization": "Google", | |
"vocab_size": 250100, | |
"num(digit)": 16829, | |
"len(digit)": "1,4,16", | |
"num(space)": 1, | |
"len(space)": "1,1,1", | |
"num(ar)": 7459, | |
"len(ar)": "1,3,16", | |
"num(zh)": 21489, | |
"len(zh)": "1,2,16", | |
"num(ja)": 27078, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 9160, | |
"len(ja-kana)": "1,3,14", | |
"num(ko)": 4041, | |
"len(ko)": "1,1,10" | |
}, | |
"lmsys/fastchat-t5-3b-v1.0": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/lmsys/fastchat-t5-3b-v1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">fastchat-t5-3b-v1.0</a>", | |
"organization": "LMSYS", | |
"vocab_size": 32110, | |
"num(digit)": 1033, | |
"len(digit)": "1,3,8", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 0, | |
"len(zh)": "-", | |
"num(ja)": 0, | |
"len(ja)": "-", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"paust/pko-t5-large": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/paust/pko-t5-large\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">pko-t5-large</a>", | |
"organization": "PAUST", | |
"vocab_size": 50358, | |
"num(digit)": 51, | |
"len(digit)": "1,2,3", | |
"num(space)": 10, | |
"len(space)": "1,1,1", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 0, | |
"len(zh)": "-", | |
"num(ja)": 0, | |
"len(ja)": "-", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 49050, | |
"len(ko)": "1,2,16" | |
}, | |
"bloom": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/bigscience/bloom\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bloom</a>", | |
"organization": "BigScience", | |
"vocab_size": 250680, | |
"num(digit)": 6629, | |
"len(digit)": "1,4,50", | |
"num(space)": 140180, | |
"len(space)": "1,6,600", | |
"num(ar)": 20854, | |
"len(ar)": "1,5,16", | |
"num(zh)": 30603, | |
"len(zh)": "1,2,23", | |
"num(ja)": 30816, | |
"len(ja)": "1,2,23", | |
"num(ja-kana)": 214, | |
"len(ja-kana)": "1,1,3", | |
"num(ko)": 338, | |
"len(ko)": "1,1,3" | |
}, | |
"llama": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/huggyllama/llama-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama</a>", | |
"organization": "Meta", | |
"vocab_size": 32000, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 700, | |
"len(zh)": "1,1,1", | |
"num(ja)": 837, | |
"len(ja)": "1,1,1", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"ClueAI/ChatYuan-large-v2": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClueAI/ChatYuan-large-v2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">ChatYuan-large-v2</a>", | |
"organization": "CLUE", | |
"vocab_size": 32128, | |
"num(digit)": 740, | |
"len(digit)": "1,3,9", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 2, | |
"len(ar)": "1,1,1", | |
"num(zh)": 29591, | |
"len(zh)": "1,2,16", | |
"num(ja)": 29736, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 145, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"Meta/llama3": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>", | |
"organization": "Meta", | |
"vocab_size": 128256, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 60860, | |
"len(space)": "1,6,128", | |
"num(ar)": 3810, | |
"len(ar)": "1,4,11", | |
"num(zh)": 4424, | |
"len(zh)": "1,1,7", | |
"num(ja)": 5387, | |
"len(ja)": "1,2,8", | |
"num(ja-kana)": 1086, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 2281, | |
"len(ko)": "1,2,6" | |
}, | |
"openai/gpt-4": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4</a>", | |
"organization": "OpenAI", | |
"vocab_size": 100277, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 47472, | |
"len(space)": "1,7,128", | |
"num(ar)": 113, | |
"len(ar)": "1,2,10", | |
"num(zh)": 868, | |
"len(zh)": "1,1,7", | |
"num(ja)": 1035, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 169, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 299, | |
"len(ko)": "1,2,4" | |
}, | |
"gradientai/Llama-3-8B-Instruct-Gradient-1048k": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/gradientai/Llama-3-8B-Instruct-Gradient-1048k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3</a>", | |
"organization": "Meta", | |
"vocab_size": 128256, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 60860, | |
"len(space)": "1,6,128", | |
"num(ar)": 3810, | |
"len(ar)": "1,4,11", | |
"num(zh)": 4424, | |
"len(zh)": "1,1,7", | |
"num(ja)": 5387, | |
"len(ja)": "1,2,8", | |
"num(ja-kana)": 1086, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 2281, | |
"len(ko)": "1,2,6" | |
}, | |
"bigscience/bloom": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/bigscience/bloom\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bloom</a>", | |
"organization": "BigScience", | |
"vocab_size": 250680, | |
"num(digit)": 6629, | |
"len(digit)": "1,4,50", | |
"num(space)": 140180, | |
"len(space)": "1,6,600", | |
"num(ar)": 20854, | |
"len(ar)": "1,5,16", | |
"num(zh)": 30603, | |
"len(zh)": "1,2,23", | |
"num(ja)": 30816, | |
"len(ja)": "1,2,23", | |
"num(ja-kana)": 214, | |
"len(ja-kana)": "1,1,3", | |
"num(ko)": 338, | |
"len(ko)": "1,1,3" | |
}, | |
"huggyllama/llama-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/huggyllama/llama-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama</a>", | |
"organization": "Meta", | |
"vocab_size": 32000, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 700, | |
"len(zh)": "1,1,1", | |
"num(ja)": 837, | |
"len(ja)": "1,1,1", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"baichuan-inc/Baichuan-7B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/baichuan-inc/Baichuan-7B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">baichuan</a>", | |
"organization": "Baichuan", | |
"vocab_size": 64000, | |
"num(digit)": 335, | |
"len(digit)": "1,14,14", | |
"num(space)": 13, | |
"len(space)": "1,1,1", | |
"num(ar)": 299, | |
"len(ar)": "1,1,2", | |
"num(zh)": 27676, | |
"len(zh)": "1,1,9", | |
"num(ja)": 28522, | |
"len(ja)": "1,1,9", | |
"num(ja-kana)": 178, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 1591, | |
"len(ko)": "1,1,1" | |
}, | |
"01-ai/Yi-34B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-34B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-34B</a>", | |
"organization": "Yi", | |
"vocab_size": 64000, | |
"num(digit)": 200, | |
"len(digit)": "1,13,15", | |
"num(space)": 24274, | |
"len(space)": "1,7,16", | |
"num(ar)": 18, | |
"len(ar)": "1,1,4", | |
"num(zh)": 21356, | |
"len(zh)": "1,2,12", | |
"num(ja)": 21407, | |
"len(ja)": "1,2,12", | |
"num(ja-kana)": 51, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 28, | |
"len(ko)": "1,1,2" | |
}, | |
"01-ai/Yi-6B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-6B</a>", | |
"organization": "Yi", | |
"vocab_size": 64000, | |
"num(digit)": 200, | |
"len(digit)": "1,13,15", | |
"num(space)": 24274, | |
"len(space)": "1,7,16", | |
"num(ar)": 18, | |
"len(ar)": "1,1,4", | |
"num(zh)": 21356, | |
"len(zh)": "1,2,12", | |
"num(ja)": 21407, | |
"len(ja)": "1,2,12", | |
"num(ja-kana)": 51, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 28, | |
"len(ko)": "1,1,2" | |
}, | |
"01-ai/Yi-VL-34B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-VL-34B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-VL-34B</a>", | |
"organization": "Yi", | |
"vocab_size": 64000, | |
"num(digit)": 200, | |
"len(digit)": "1,13,15", | |
"num(space)": 43, | |
"len(space)": "1,2,15", | |
"num(ar)": 18, | |
"len(ar)": "1,1,4", | |
"num(zh)": 21356, | |
"len(zh)": "1,2,12", | |
"num(ja)": 21407, | |
"len(ja)": "1,2,12", | |
"num(ja-kana)": 51, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 28, | |
"len(ko)": "1,1,2" | |
}, | |
"ClassCat/gpt2-base-french": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClassCat/gpt2-base-french\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-french</a>", | |
"organization": "ClassCat", | |
"vocab_size": 50000, | |
"num(digit)": 1833, | |
"len(digit)": "1,4,5", | |
"num(space)": 31889, | |
"len(space)": "1,7,32", | |
"num(ar)": 41, | |
"len(ar)": "1,1,4", | |
"num(zh)": 27, | |
"len(zh)": "1,1,1", | |
"num(ja)": 46, | |
"len(ja)": "1,1,2", | |
"num(ja-kana)": 19, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"ClassCat/gpt2-base-spanish": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClassCat/gpt2-base-spanish\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-spanish</a>", | |
"organization": "ClassCat", | |
"vocab_size": 50000, | |
"num(digit)": 1492, | |
"len(digit)": "1,4,9", | |
"num(space)": 34496, | |
"len(space)": "1,8,32", | |
"num(ar)": 36, | |
"len(ar)": "1,1,4", | |
"num(zh)": 13, | |
"len(zh)": "1,1,1", | |
"num(ja)": 36, | |
"len(ja)": "1,1,2", | |
"num(ja-kana)": 23, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"ClueAI/PromptCLUE-base": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ClueAI/PromptCLUE-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">PromptCLUE-base</a>", | |
"organization": "CLUE", | |
"vocab_size": 32128, | |
"num(digit)": 740, | |
"len(digit)": "1,3,9", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 2, | |
"len(ar)": "1,1,1", | |
"num(zh)": 29591, | |
"len(zh)": "1,2,16", | |
"num(ja)": 29736, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 145, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"CohereForAI/aya-101": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/CohereForAI/aya-101\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">aya-101</a>", | |
"organization": "Cohere For AI", | |
"vocab_size": 250100, | |
"num(digit)": 16829, | |
"len(digit)": "1,4,16", | |
"num(space)": 1, | |
"len(space)": "1,1,1", | |
"num(ar)": 7459, | |
"len(ar)": "1,3,16", | |
"num(zh)": 21489, | |
"len(zh)": "1,2,16", | |
"num(ja)": 27078, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 9160, | |
"len(ja-kana)": "1,3,14", | |
"num(ko)": 4041, | |
"len(ko)": "1,1,10" | |
}, | |
"EleutherAI/gpt-neox-20b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/EleutherAI/gpt-neox-20b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-neox-20b</a>", | |
"organization": "EleutherAI", | |
"vocab_size": 50277, | |
"num(digit)": 2036, | |
"len(digit)": "1,3,35", | |
"num(space)": 28996, | |
"len(space)": "1,7,512", | |
"num(ar)": 94, | |
"len(ar)": "1,2,4", | |
"num(zh)": 313, | |
"len(zh)": "1,1,2", | |
"num(ja)": 480, | |
"len(ja)": "1,1,4", | |
"num(ja-kana)": 167, | |
"len(ja-kana)": "1,1,4", | |
"num(ko)": 25, | |
"len(ko)": "1,1,2" | |
}, | |
"HuggingFaceH4/starchat-alpha": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/HuggingFaceH4/starchat-alpha\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">starchat-alpha</a>", | |
"organization": "-", | |
"vocab_size": 49156, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 16515, | |
"len(space)": "1,6,256", | |
"num(ar)": 84, | |
"len(ar)": "1,2,4", | |
"num(zh)": 2030, | |
"len(zh)": "1,1,7", | |
"num(ja)": 2368, | |
"len(ja)": "1,1,8", | |
"num(ja-kana)": 360, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 491, | |
"len(ko)": "1,2,5" | |
}, | |
"HuggingFaceH4/zephyr-7b-beta": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/HuggingFaceH4/zephyr-7b-beta\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">zephyr-7b-beta</a>", | |
"organization": "HuggingFace", | |
"vocab_size": 32000, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 85, | |
"len(space)": "1,3,15", | |
"num(ar)": 71, | |
"len(ar)": "1,1,2", | |
"num(zh)": 1459, | |
"len(zh)": "1,1,2", | |
"num(ja)": 1593, | |
"len(ja)": "1,1,2", | |
"num(ja-kana)": 134, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 346, | |
"len(ko)": "1,1,1" | |
}, | |
"LLM360/CrystalCoder": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/LLM360/CrystalCoder\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">CrystalCoder</a>", | |
"organization": "MBZUAI", | |
"vocab_size": 32022, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 700, | |
"len(zh)": "1,1,1", | |
"num(ja)": 837, | |
"len(ja)": "1,1,1", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"NousResearch/Llama-2-7b-chat-hf": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/NousResearch/Llama-2-7b-chat-hf\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama2</a>", | |
"organization": "Meta", | |
"vocab_size": 32001, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 700, | |
"len(zh)": "1,1,1", | |
"num(ja)": 837, | |
"len(ja)": "1,1,1", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"OrionStarAI/Orion-14B-Chat": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/OrionStarAI/Orion-14B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Orion-14B-Chat</a>", | |
"organization": "OrionStar", | |
"vocab_size": 84608, | |
"num(digit)": 1559, | |
"len(digit)": "1,4,14", | |
"num(space)": 18383, | |
"len(space)": "1,6,16", | |
"num(ar)": 102, | |
"len(ar)": "1,1,1", | |
"num(zh)": 46998, | |
"len(zh)": "1,2,16", | |
"num(ja)": 49644, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 2987, | |
"len(ja-kana)": "1,3,11", | |
"num(ko)": 5110, | |
"len(ko)": "1,2,7" | |
}, | |
"Qwen/Qwen-7B-Chat": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen-7B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen</a>", | |
"organization": "Alibaba", | |
"vocab_size": 151851, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 55883, | |
"len(space)": "1,6,128", | |
"num(ar)": 4018, | |
"len(ar)": "1,3,12", | |
"num(zh)": 25557, | |
"len(zh)": "1,2,7", | |
"num(ja)": 27206, | |
"len(ja)": "1,2,11", | |
"num(ja-kana)": 2089, | |
"len(ja-kana)": "1,3,11", | |
"num(ko)": 3495, | |
"len(ko)": "1,1,5" | |
}, | |
"Qwen/Qwen1.5-14B-Chat": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-14B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5</a>", | |
"organization": "Alibaba", | |
"vocab_size": 151646, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 55883, | |
"len(space)": "1,6,128", | |
"num(ar)": 4018, | |
"len(ar)": "1,3,12", | |
"num(zh)": 25557, | |
"len(zh)": "1,2,7", | |
"num(ja)": 27206, | |
"len(ja)": "1,2,11", | |
"num(ja-kana)": 2089, | |
"len(ja-kana)": "1,3,11", | |
"num(ko)": 3495, | |
"len(ko)": "1,1,5" | |
}, | |
"Skywork/Skywork-13B-Math": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Skywork/Skywork-13B-Math\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Skywork-13B-Math</a>", | |
"organization": "Kunlun", | |
"vocab_size": 65519, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 62, | |
"len(space)": "1,2,15", | |
"num(ar)": 56, | |
"len(ar)": "1,1,2", | |
"num(zh)": 33913, | |
"len(zh)": "1,2,5", | |
"num(ja)": 34064, | |
"len(ja)": "1,2,5", | |
"num(ja-kana)": 150, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"Skywork/Skywork-13B-base": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Skywork/Skywork-13B-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Skywork-13B-base</a>", | |
"organization": "Kunlun", | |
"vocab_size": 65519, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 62, | |
"len(space)": "1,2,15", | |
"num(ar)": 56, | |
"len(ar)": "1,1,2", | |
"num(zh)": 33913, | |
"len(zh)": "1,2,5", | |
"num(ja)": 34064, | |
"len(ja)": "1,2,5", | |
"num(ja-kana)": 150, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"THUDM/chatglm-6b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm-6b</a>", | |
"organization": "Tsinghua", | |
"vocab_size": 130344, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 93, | |
"len(space)": "1,34,80", | |
"num(ar)": 137, | |
"len(ar)": "1,2,4", | |
"num(zh)": 61358, | |
"len(zh)": "1,2,16", | |
"num(ja)": 61784, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 439, | |
"len(ja-kana)": "1,2,5", | |
"num(ko)": 114, | |
"len(ko)": "1,1,3" | |
}, | |
"THUDM/chatglm2-6b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm2-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm2-6b</a>", | |
"organization": "Tsinghua", | |
"vocab_size": 64787, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 67, | |
"len(space)": "1,2,15", | |
"num(ar)": 57, | |
"len(ar)": "1,1,2", | |
"num(zh)": 30922, | |
"len(zh)": "1,2,16", | |
"num(ja)": 31065, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 143, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 604, | |
"len(ko)": "1,1,1" | |
}, | |
"THUDM/chatglm3-6b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/THUDM/chatglm3-6b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chatglm3-6b</a>", | |
"organization": "Tsinghua", | |
"vocab_size": 64796, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 67, | |
"len(space)": "1,2,15", | |
"num(ar)": 57, | |
"len(ar)": "1,1,2", | |
"num(zh)": 30922, | |
"len(zh)": "1,2,16", | |
"num(ja)": 31065, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 143, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 604, | |
"len(ko)": "1,1,1" | |
}, | |
"TigerResearch/tigerbot-13b-chat-v2": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/TigerResearch/tigerbot-13b-chat-v2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">tigerbot-13b-chat-v2</a>", | |
"organization": "Tigerobo", | |
"vocab_size": 60515, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 28603, | |
"len(zh)": "1,2,16", | |
"num(ja)": 28770, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 167, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 261, | |
"len(ko)": "1,1,1" | |
}, | |
"TigerResearch/tigerbot-70b-chat-v4-4k": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/TigerResearch/tigerbot-70b-chat-v4-4k\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">tigerbot-70b-chat-v4-4k</a>", | |
"organization": "Tigerobo", | |
"vocab_size": 65110, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 30509, | |
"len(zh)": "1,2,16", | |
"num(ja)": 32061, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 2071, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 1504, | |
"len(ko)": "1,1,5" | |
}, | |
"Upstage/SOLAR-10.7B-v1.0": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Upstage/SOLAR-10.7B-v1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">SOLAR-10.7B-v1.0</a>", | |
"organization": "-", | |
"vocab_size": 32000, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 85, | |
"len(space)": "1,3,15", | |
"num(ar)": 71, | |
"len(ar)": "1,1,2", | |
"num(zh)": 1459, | |
"len(zh)": "1,1,2", | |
"num(ja)": 1593, | |
"len(ja)": "1,1,2", | |
"num(ja-kana)": 134, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 346, | |
"len(ko)": "1,1,1" | |
}, | |
"WizardLM/WizardCoder-15B-V1.0": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardCoder-15B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardCoder-15B-V1.0</a>", | |
"organization": "Microsoft", | |
"vocab_size": 49153, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 16515, | |
"len(space)": "1,6,256", | |
"num(ar)": 84, | |
"len(ar)": "1,2,4", | |
"num(zh)": 2030, | |
"len(zh)": "1,1,7", | |
"num(ja)": 2368, | |
"len(ja)": "1,1,8", | |
"num(ja-kana)": 360, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 491, | |
"len(ko)": "1,2,5" | |
}, | |
"WizardLM/WizardCoder-Python-7B-V1.0": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardCoder-Python-7B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardCoder-Python-7B-V1.0</a>", | |
"organization": "Microsoft", | |
"vocab_size": 32001, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 700, | |
"len(zh)": "1,1,1", | |
"num(ja)": 837, | |
"len(ja)": "1,1,1", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"WizardLM/WizardLM-7B-V1.0": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardLM-7B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardLM-7B-V1.0</a>", | |
"organization": "Microsoft", | |
"vocab_size": 32001, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 700, | |
"len(zh)": "1,1,1", | |
"num(ja)": 837, | |
"len(ja)": "1,1,1", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"WizardLM/WizardMath-70B-V1.0": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/WizardLM/WizardMath-70B-V1.0\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">WizardMath-70B-V1.0</a>", | |
"organization": "Microsoft", | |
"vocab_size": 32002, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 700, | |
"len(zh)": "1,1,1", | |
"num(ja)": 837, | |
"len(ja)": "1,1,1", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"abeja/gpt-neox-japanese-2.7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/abeja/gpt-neox-japanese-2.7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-neox-japanese-2.7b</a>", | |
"organization": "ABEJA", | |
"vocab_size": 32000, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 15176, | |
"len(zh)": "1,2,2", | |
"num(ja)": 31482, | |
"len(ja)": "1,2,3", | |
"num(ja-kana)": 16306, | |
"len(ja-kana)": "1,3,3", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"ai21labs/Jamba-v0.1": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ai21labs/Jamba-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Jamba-v0.1</a>", | |
"organization": "AI21", | |
"vocab_size": 65536, | |
"num(digit)": 1556, | |
"len(digit)": "1,16,17", | |
"num(space)": 39501, | |
"len(space)": "1,7,32", | |
"num(ar)": 867, | |
"len(ar)": "1,3,8", | |
"num(zh)": 1157, | |
"len(zh)": "1,1,2", | |
"num(ja)": 1287, | |
"len(ja)": "1,1,2", | |
"num(ja-kana)": 130, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 312, | |
"len(ko)": "1,1,2" | |
}, | |
"allenai/OLMo-7B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/allenai/OLMo-7B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">OLMo-7B</a>", | |
"organization": "Allen AI", | |
"vocab_size": 50280, | |
"num(digit)": 2036, | |
"len(digit)": "1,3,35", | |
"num(space)": 29019, | |
"len(space)": "1,7,512", | |
"num(ar)": 94, | |
"len(ar)": "1,2,4", | |
"num(zh)": 313, | |
"len(zh)": "1,1,2", | |
"num(ja)": 480, | |
"len(ja)": "1,1,4", | |
"num(ja-kana)": 167, | |
"len(ja-kana)": "1,1,4", | |
"num(ko)": 25, | |
"len(ko)": "1,1,2" | |
}, | |
"baichuan-inc/Baichuan2-7B-Chat": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">baichuan2</a>", | |
"organization": "Baichuan", | |
"vocab_size": 125696, | |
"num(digit)": 1023, | |
"len(digit)": "1,14,14", | |
"num(space)": 26013, | |
"len(space)": "1,7,32", | |
"num(ar)": 335, | |
"len(ar)": "1,1,27", | |
"num(zh)": 70398, | |
"len(zh)": "1,2,32", | |
"num(ja)": 71269, | |
"len(ja)": "1,2,32", | |
"num(ja-kana)": 206, | |
"len(ja-kana)": "1,1,9", | |
"num(ko)": 1595, | |
"len(ko)": "1,1,2" | |
}, | |
"ckiplab/gpt2-base-chinese": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/ckiplab/gpt2-base-chinese\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2-base-chinese</a>", | |
"organization": "SINICA", | |
"vocab_size": 21128, | |
"num(digit)": 1451, | |
"len(digit)": "1,3,12", | |
"num(space)": 2, | |
"len(space)": "1,2,3", | |
"num(ar)": 30, | |
"len(ar)": "1,2,3", | |
"num(zh)": 14642, | |
"len(zh)": "1,2,3", | |
"num(ja)": 15197, | |
"len(ja)": "1,3,15", | |
"num(ja-kana)": 553, | |
"len(ja-kana)": "1,3,15", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"cyberagent/open-calm-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/cyberagent/open-calm-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">open-calm-7b</a>", | |
"organization": "CyberAgent", | |
"vocab_size": 52000, | |
"num(digit)": 690, | |
"len(digit)": "1,3,5", | |
"num(space)": 1698, | |
"len(space)": "1,4,33", | |
"num(ar)": 10, | |
"len(ar)": "1,1,4", | |
"num(zh)": 30775, | |
"len(zh)": "1,3,31", | |
"num(ja)": 45790, | |
"len(ja)": "1,3,31", | |
"num(ja-kana)": 32535, | |
"len(ja-kana)": "1,3,31", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"databricks/dbrx-instruct": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/databricks/dbrx-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">dbrx-instruct</a>", | |
"organization": "Databricks", | |
"vocab_size": 100280, | |
"num(digit)": 1126, | |
"len(digit)": "1,3,17", | |
"num(space)": 47400, | |
"len(space)": "1,7,128", | |
"num(ar)": 113, | |
"len(ar)": "1,2,10", | |
"num(zh)": 868, | |
"len(zh)": "1,1,7", | |
"num(ja)": 1035, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 169, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 299, | |
"len(ko)": "1,2,4" | |
}, | |
"deepseek-ai/DeepSeek-V2": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/DeepSeek-V2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">DeepSeek-V2</a>", | |
"organization": "DeepSeek", | |
"vocab_size": 100002, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 48073, | |
"len(space)": "1,7,128", | |
"num(ar)": 48, | |
"len(ar)": "1,1,4", | |
"num(zh)": 18052, | |
"len(zh)": "1,2,16", | |
"num(ja)": 18090, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 38, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 16, | |
"len(ko)": "1,1,2" | |
}, | |
"deepseek-ai/deepseek-coder-33b-instruct": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">deepseek-coder-33b-instruct</a>", | |
"organization": "DeepSeek", | |
"vocab_size": 32022, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 15254, | |
"len(space)": "1,6,65", | |
"num(ar)": 12, | |
"len(ar)": "1,1,2", | |
"num(zh)": 4803, | |
"len(zh)": "1,2,4", | |
"num(ja)": 4804, | |
"len(ja)": "1,2,4", | |
"num(ja-kana)": 1, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"deepseek-ai/deepseek-llm-7b-base": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/deepseek-ai/deepseek-llm-7b-base\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">deepseek-llm-7b-base</a>", | |
"organization": "DeepSeek", | |
"vocab_size": 100015, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 48073, | |
"len(space)": "1,7,128", | |
"num(ar)": 48, | |
"len(ar)": "1,1,4", | |
"num(zh)": 18052, | |
"len(zh)": "1,2,16", | |
"num(ja)": 18090, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 38, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 16, | |
"len(ko)": "1,1,2" | |
}, | |
"eson/kplug-base-encoder": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/eson/kplug-base-encoder\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">kplug</a>", | |
"organization": "JD", | |
"vocab_size": 10261, | |
"num(digit)": 420, | |
"len(digit)": "1,3,12", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 5764, | |
"len(zh)": "1,1,1", | |
"num(ja)": 5766, | |
"len(ja)": "1,1,3", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"fnlp/moss-moon-003-sft": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/fnlp/moss-moon-003-sft\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">moss-moon-003-sft</a>", | |
"organization": "Fudan", | |
"vocab_size": 106072, | |
"num(digit)": 1848, | |
"len(digit)": "1,3,16", | |
"num(space)": 33566, | |
"len(space)": "1,7,102", | |
"num(ar)": 25, | |
"len(ar)": "1,1,4", | |
"num(zh)": 54230, | |
"len(zh)": "1,2,15", | |
"num(ja)": 54381, | |
"len(ja)": "1,2,15", | |
"num(ja-kana)": 152, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"google/gemma-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/gemma-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gemma-7b</a>", | |
"organization": "Google", | |
"vocab_size": 256000, | |
"num(digit)": 134, | |
"len(digit)": "1,10,12", | |
"num(space)": 125662, | |
"len(space)": "1,7,31", | |
"num(ar)": 6274, | |
"len(ar)": "1,4,15", | |
"num(zh)": 23767, | |
"len(zh)": "1,2,12", | |
"num(ja)": 28852, | |
"len(ja)": "1,2,12", | |
"num(ja-kana)": 7061, | |
"len(ja-kana)": "1,3,12", | |
"num(ko)": 2295, | |
"len(ko)": "1,1,5" | |
}, | |
"google/switch-c-2048": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/switch-c-2048\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">switch-c-2048</a>", | |
"organization": "Google", | |
"vocab_size": 32100, | |
"num(digit)": 1133, | |
"len(digit)": "1,3,13", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 0, | |
"len(ar)": "-", | |
"num(zh)": 0, | |
"len(zh)": "-", | |
"num(ja)": 0, | |
"len(ja)": "-", | |
"num(ja-kana)": 0, | |
"len(ja-kana)": "-", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"hfl/chinese-alpaca-lora-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-alpaca-lora-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-alpaca-lora-7b</a>", | |
"organization": "-", | |
"vocab_size": 49954, | |
"num(digit)": 614, | |
"len(digit)": "1,3,5", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 17839, | |
"len(zh)": "1,2,13", | |
"num(ja)": 17993, | |
"len(ja)": "1,2,13", | |
"num(ja-kana)": 154, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 135, | |
"len(ko)": "1,1,1" | |
}, | |
"hfl/chinese-llama-2-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-llama-2-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-llama-2-7b</a>", | |
"organization": "-", | |
"vocab_size": 55296, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 23974, | |
"len(zh)": "1,2,16", | |
"num(ja)": 24111, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"hfl/chinese-llama-lora-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/chinese-llama-lora-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">chinese-llama-lora-7b</a>", | |
"organization": "-", | |
"vocab_size": 49953, | |
"num(digit)": 614, | |
"len(digit)": "1,3,5", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 17839, | |
"len(zh)": "1,2,13", | |
"num(ja)": 17993, | |
"len(ja)": "1,2,13", | |
"num(ja-kana)": 154, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 135, | |
"len(ko)": "1,1,1" | |
}, | |
"hfl/llama-3-chinese-8b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hfl/llama-3-chinese-8b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama-3-chinese-8b</a>", | |
"organization": "-", | |
"vocab_size": 128256, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 60860, | |
"len(space)": "1,6,128", | |
"num(ar)": 3810, | |
"len(ar)": "1,4,11", | |
"num(zh)": 4424, | |
"len(zh)": "1,1,7", | |
"num(ja)": 5387, | |
"len(ja)": "1,2,8", | |
"num(ja-kana)": 1086, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 2281, | |
"len(ko)": "1,2,6" | |
}, | |
"hpcai-tech/grok-1": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/hpcai-tech/grok-1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">grok-1</a>", | |
"organization": "xAI", | |
"vocab_size": 131072, | |
"num(digit)": 40, | |
"len(digit)": "1,6,13", | |
"num(space)": 399, | |
"len(space)": "1,3,16", | |
"num(ar)": 69, | |
"len(ar)": "1,2,4", | |
"num(zh)": 1626, | |
"len(zh)": "1,2,7", | |
"num(ja)": 3118, | |
"len(ja)": "1,2,8", | |
"num(ja-kana)": 1908, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 67, | |
"len(ko)": "1,1,2" | |
}, | |
"internlm/internlm-chat-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm-chat-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm-chat-7b</a>", | |
"organization": "Shanghai AI Lab", | |
"vocab_size": 103168, | |
"num(digit)": 1259, | |
"len(digit)": "1,3,19", | |
"num(space)": 33008, | |
"len(space)": "1,6,128", | |
"num(ar)": 6702, | |
"len(ar)": "1,4,16", | |
"num(zh)": 32000, | |
"len(zh)": "1,2,15", | |
"num(ja)": 32866, | |
"len(ja)": "1,2,15", | |
"num(ja-kana)": 864, | |
"len(ja-kana)": "1,2,9", | |
"num(ko)": 298, | |
"len(ko)": "1,1,1" | |
}, | |
"internlm/internlm-xcomposer-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm-xcomposer-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm-xcomposer-7b</a>", | |
"organization": "Shanghai AI Lab", | |
"vocab_size": 103168, | |
"num(digit)": 1261, | |
"len(digit)": "1,3,19", | |
"num(space)": 33008, | |
"len(space)": "1,6,128", | |
"num(ar)": 6702, | |
"len(ar)": "1,4,16", | |
"num(zh)": 32000, | |
"len(zh)": "1,2,15", | |
"num(ja)": 32866, | |
"len(ja)": "1,2,15", | |
"num(ja-kana)": 864, | |
"len(ja-kana)": "1,2,9", | |
"num(ko)": 298, | |
"len(ko)": "1,1,1" | |
}, | |
"internlm/internlm2-chat-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm2-chat-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm2-chat-7b</a>", | |
"organization": "Shanghai AI Lab", | |
"vocab_size": 92544, | |
"num(digit)": 1261, | |
"len(digit)": "1,3,18", | |
"num(space)": 28681, | |
"len(space)": "1,7,128", | |
"num(ar)": 30, | |
"len(ar)": "1,1,1", | |
"num(zh)": 31148, | |
"len(zh)": "1,2,15", | |
"num(ja)": 31296, | |
"len(ja)": "1,2,15", | |
"num(ja-kana)": 148, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 83, | |
"len(ko)": "1,1,1" | |
}, | |
"internlm/internlm2-math-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/internlm/internlm2-math-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">internlm2-math-7b</a>", | |
"organization": "Shanghai AI Lab", | |
"vocab_size": 92544, | |
"num(digit)": 1261, | |
"len(digit)": "1,3,18", | |
"num(space)": 28681, | |
"len(space)": "1,7,128", | |
"num(ar)": 30, | |
"len(ar)": "1,1,1", | |
"num(zh)": 31148, | |
"len(zh)": "1,2,15", | |
"num(ja)": 31296, | |
"len(ja)": "1,2,15", | |
"num(ja-kana)": 148, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 83, | |
"len(ko)": "1,1,1" | |
}, | |
"microsoft/Phi-3-mini-4k-instruct": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Phi-3-mini-4k-instruct</a>", | |
"organization": "Microsoft", | |
"vocab_size": 32011, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 61, | |
"len(space)": "1,2,15", | |
"num(ar)": 55, | |
"len(ar)": "1,1,2", | |
"num(zh)": 700, | |
"len(zh)": "1,1,1", | |
"num(ja)": 837, | |
"len(ja)": "1,1,1", | |
"num(ja-kana)": 137, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 111, | |
"len(ko)": "1,1,1" | |
}, | |
"microsoft/phi-1": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/phi-1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">phi-1</a>", | |
"organization": "Microsoft", | |
"vocab_size": 50295, | |
"num(digit)": 1691, | |
"len(digit)": "1,3,16", | |
"num(space)": 33129, | |
"len(space)": "1,7,66", | |
"num(ar)": 22, | |
"len(ar)": "1,1,3", | |
"num(zh)": 51, | |
"len(zh)": "1,1,4", | |
"num(ja)": 183, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 133, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"microsoft/phi-2": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/microsoft/phi-2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">phi-2</a>", | |
"organization": "Microsoft", | |
"vocab_size": 50295, | |
"num(digit)": 1691, | |
"len(digit)": "1,3,16", | |
"num(space)": 33129, | |
"len(space)": "1,7,66", | |
"num(ar)": 22, | |
"len(ar)": "1,1,3", | |
"num(zh)": 51, | |
"len(zh)": "1,1,4", | |
"num(ja)": 183, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 133, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"mistralai/Mistral-7B-v0.1": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/mistralai/Mistral-7B-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Mistral-7B-v0.1</a>", | |
"organization": "Mistral", | |
"vocab_size": 32000, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 85, | |
"len(space)": "1,3,15", | |
"num(ar)": 71, | |
"len(ar)": "1,1,2", | |
"num(zh)": 1459, | |
"len(zh)": "1,1,2", | |
"num(ja)": 1593, | |
"len(ja)": "1,1,2", | |
"num(ja-kana)": 134, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 346, | |
"len(ko)": "1,1,1" | |
}, | |
"mistralai/Mixtral-8x7B-v0.1": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/mistralai/Mixtral-8x7B-v0.1\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Mixtral-8x7B-v0.1</a>", | |
"organization": "Mistral", | |
"vocab_size": 32000, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 85, | |
"len(space)": "1,3,15", | |
"num(ar)": 71, | |
"len(ar)": "1,1,2", | |
"num(zh)": 1459, | |
"len(zh)": "1,1,2", | |
"num(ja)": 1593, | |
"len(ja)": "1,1,2", | |
"num(ja-kana)": 134, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 346, | |
"len(ko)": "1,1,1" | |
}, | |
"openai-community/gpt2": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/openai-community/gpt2\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt2</a>", | |
"organization": "OpenAI", | |
"vocab_size": 50257, | |
"num(digit)": 1691, | |
"len(digit)": "1,3,16", | |
"num(space)": 33129, | |
"len(space)": "1,7,66", | |
"num(ar)": 22, | |
"len(ar)": "1,1,3", | |
"num(zh)": 51, | |
"len(zh)": "1,1,4", | |
"num(ja)": 183, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 133, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"openai/code-davinci-002": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">code-davinci-002</a>", | |
"organization": "OpenAI", | |
"vocab_size": 50281, | |
"num(digit)": 1691, | |
"len(digit)": "1,3,16", | |
"num(space)": 33175, | |
"len(space)": "1,7,66", | |
"num(ar)": 22, | |
"len(ar)": "1,1,3", | |
"num(zh)": 51, | |
"len(zh)": "1,1,4", | |
"num(ja)": 183, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 133, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"openai/gpt-3.5-turbo": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-3.5-turbo</a>", | |
"organization": "OpenAI", | |
"vocab_size": 100277, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 47472, | |
"len(space)": "1,7,128", | |
"num(ar)": 113, | |
"len(ar)": "1,2,10", | |
"num(zh)": 868, | |
"len(zh)": "1,1,7", | |
"num(ja)": 1035, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 169, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 299, | |
"len(ko)": "1,2,4" | |
}, | |
"openai/gpt-4o": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gpt-4o</a>", | |
"organization": "OpenAI", | |
"vocab_size": 200019, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 109316, | |
"len(space)": "1,6,128", | |
"num(ar)": 8055, | |
"len(ar)": "1,4,12", | |
"num(zh)": 7563, | |
"len(zh)": "1,2,11", | |
"num(ja)": 8292, | |
"len(ja)": "1,2,11", | |
"num(ja-kana)": 809, | |
"len(ja-kana)": "1,2,11", | |
"num(ko)": 2365, | |
"len(ko)": "1,2,8" | |
}, | |
"openai/text-davinci-003": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://github.com/openai/tiktoken\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">text-davinci-003</a>", | |
"organization": "OpenAI", | |
"vocab_size": 50281, | |
"num(digit)": 1691, | |
"len(digit)": "1,3,16", | |
"num(space)": 33175, | |
"len(space)": "1,7,66", | |
"num(ar)": 22, | |
"len(ar)": "1,1,3", | |
"num(zh)": 51, | |
"len(zh)": "1,1,4", | |
"num(ja)": 183, | |
"len(ja)": "1,1,7", | |
"num(ja-kana)": 133, | |
"len(ja-kana)": "1,1,7", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"thu-coai/CharacterGLM-6B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/thu-coai/CharacterGLM-6B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">CharacterGLM-6B</a>", | |
"organization": "Tsinghua", | |
"vocab_size": 64789, | |
"num(digit)": 20, | |
"len(digit)": "1,1,1", | |
"num(space)": 67, | |
"len(space)": "1,2,15", | |
"num(ar)": 57, | |
"len(ar)": "1,1,2", | |
"num(zh)": 30922, | |
"len(zh)": "1,2,16", | |
"num(ja)": 31065, | |
"len(ja)": "1,2,16", | |
"num(ja-kana)": 143, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 604, | |
"len(ko)": "1,1,1" | |
}, | |
"tiiuae/falcon-180b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tiiuae/falcon-180b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">falcon-180b</a>", | |
"organization": "TII", | |
"vocab_size": 65024, | |
"num(digit)": 1108, | |
"len(digit)": "1,3,3", | |
"num(space)": 40202, | |
"len(space)": "1,7,65", | |
"num(ar)": 21, | |
"len(ar)": "1,1,4", | |
"num(zh)": 1627, | |
"len(zh)": "1,1,3", | |
"num(ja)": 1652, | |
"len(ja)": "1,1,3", | |
"num(ja-kana)": 25, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 1, | |
"len(ko)": "1,1,1" | |
}, | |
"tiiuae/falcon-7b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/tiiuae/falcon-7b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">falcon-7b</a>", | |
"organization": "TII", | |
"vocab_size": 65024, | |
"num(digit)": 1108, | |
"len(digit)": "1,3,3", | |
"num(space)": 40202, | |
"len(space)": "1,7,65", | |
"num(ar)": 21, | |
"len(ar)": "1,1,4", | |
"num(zh)": 1627, | |
"len(zh)": "1,1,3", | |
"num(ja)": 1652, | |
"len(ja)": "1,1,3", | |
"num(ja-kana)": 25, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 1, | |
"len(ko)": "1,1,1" | |
}, | |
"Qwen/Qwen1.5-1.8B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-1.8B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5-1.8B</a>", | |
"organization": "Alibaba", | |
"vocab_size": 151646, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 55883, | |
"len(space)": "1,6,128", | |
"num(ar)": 4018, | |
"len(ar)": "1,3,12", | |
"num(zh)": 25557, | |
"len(zh)": "1,2,7", | |
"num(ja)": 27206, | |
"len(ja)": "1,2,11", | |
"num(ja-kana)": 2089, | |
"len(ja-kana)": "1,3,11", | |
"num(ko)": 3495, | |
"len(ko)": "1,1,5" | |
}, | |
"Qwen/Qwen1.5-110B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-110B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5-110B</a>", | |
"organization": "Alibaba", | |
"vocab_size": 151646, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 55883, | |
"len(space)": "1,6,128", | |
"num(ar)": 4018, | |
"len(ar)": "1,3,12", | |
"num(zh)": 25557, | |
"len(zh)": "1,2,7", | |
"num(ja)": 27206, | |
"len(ja)": "1,2,11", | |
"num(ja-kana)": 2089, | |
"len(ja-kana)": "1,3,11", | |
"num(ko)": 3495, | |
"len(ko)": "1,1,5" | |
}, | |
"Qwen/Qwen1.5-14B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen1.5-14B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen1.5-14B</a>", | |
"organization": "Alibaba", | |
"vocab_size": 151646, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 55883, | |
"len(space)": "1,6,128", | |
"num(ar)": 4018, | |
"len(ar)": "1,3,12", | |
"num(zh)": 25557, | |
"len(zh)": "1,2,7", | |
"num(ja)": 27206, | |
"len(ja)": "1,2,11", | |
"num(ja-kana)": 2089, | |
"len(ja-kana)": "1,3,11", | |
"num(ko)": 3495, | |
"len(ko)": "1,1,5" | |
}, | |
"asafaya/bert-base-arabic": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/asafaya/bert-base-arabic\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bert-base-arabic</a>", | |
"organization": "-", | |
"vocab_size": 32000, | |
"num(digit)": 507, | |
"len(digit)": "1,3,21", | |
"num(space)": 0, | |
"len(space)": "-", | |
"num(ar)": 28367, | |
"len(ar)": "1,5,34", | |
"num(zh)": 180, | |
"len(zh)": "1,1,1", | |
"num(ja)": 333, | |
"len(ja)": "1,1,3", | |
"num(ja-kana)": 153, | |
"len(ja-kana)": "1,1,3", | |
"num(ko)": 0, | |
"len(ko)": "-" | |
}, | |
"rinna/bilingual-gpt-neox-4b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/rinna/bilingual-gpt-neox-4b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">bilingual-gpt-neox-4b</a>", | |
"organization": "ABEJA", | |
"vocab_size": 65536, | |
"num(digit)": 266, | |
"len(digit)": "1,6,6", | |
"num(space)": 3, | |
"len(space)": "1,1,1", | |
"num(ar)": 108, | |
"len(ar)": "1,1,4", | |
"num(zh)": 30158, | |
"len(zh)": "1,2,16", | |
"num(ja)": 40298, | |
"len(ja)": "1,3,16", | |
"num(ja-kana)": 21366, | |
"len(ja-kana)": "1,4,16", | |
"num(ko)": 384, | |
"len(ko)": "1,1,1" | |
}, | |
"01-ai/Yi-1.5-34B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/01-ai/Yi-1.5-34B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Yi-1.5-34B</a>", | |
"organization": "Yi", | |
"vocab_size": 63992, | |
"num(digit)": 195, | |
"len(digit)": "1,13,13", | |
"num(space)": 43, | |
"len(space)": "1,2,15", | |
"num(ar)": 18, | |
"len(ar)": "1,1,4", | |
"num(zh)": 21350, | |
"len(zh)": "1,2,12", | |
"num(ja)": 21401, | |
"len(ja)": "1,2,12", | |
"num(ja-kana)": 51, | |
"len(ja-kana)": "1,1,2", | |
"num(ko)": 28, | |
"len(ko)": "1,1,2" | |
}, | |
"Qwen/Qwen2-72B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/Qwen/Qwen2-72B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Qwen2-72B</a>", | |
"organization": "Alibaba", | |
"vocab_size": 151646, | |
"num(digit)": 10, | |
"len(digit)": "1,1,1", | |
"num(space)": 55883, | |
"len(space)": "1,6,128", | |
"num(ar)": 4018, | |
"len(ar)": "1,3,12", | |
"num(zh)": 25557, | |
"len(zh)": "1,2,7", | |
"num(ja)": 27206, | |
"len(ja)": "1,2,11", | |
"num(ja-kana)": 2089, | |
"len(ja-kana)": "1,3,11", | |
"num(ko)": 3495, | |
"len(ko)": "1,1,5" | |
}, | |
"apple/DCLM-7B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/apple/DCLM-7B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">DCLM-7B</a>", | |
"organization": "Apple", | |
"vocab_size": 50277, | |
"num(digit)": 2036, | |
"len(digit)": "1,3,35", | |
"num(space)": 28996, | |
"len(space)": "1,7,512", | |
"num(ar)": 94, | |
"len(ar)": "1,2,4", | |
"num(zh)": 313, | |
"len(zh)": "1,1,2", | |
"num(ja)": 480, | |
"len(ja)": "1,1,4", | |
"num(ja-kana)": 167, | |
"len(ja-kana)": "1,1,4", | |
"num(ko)": 25, | |
"len(ko)": "1,1,2" | |
}, | |
"google/gemma-2-9b": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/google/gemma-2-9b\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">gemma-2-9b</a>", | |
"organization": "Google", | |
"vocab_size": 256000, | |
"num(digit)": 134, | |
"len(digit)": "1,10,12", | |
"num(space)": 125662, | |
"len(space)": "1,7,31", | |
"num(ar)": 6274, | |
"len(ar)": "1,4,15", | |
"num(zh)": 23767, | |
"len(zh)": "1,2,12", | |
"num(ja)": 28852, | |
"len(ja)": "1,2,12", | |
"num(ja-kana)": 7061, | |
"len(ja-kana)": "1,3,12", | |
"num(ko)": 2295, | |
"len(ko)": "1,1,5" | |
}, | |
"meta-llama/Meta-Llama-3.1-405B": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/meta-llama/Meta-Llama-3.1-405B\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">llama3.1</a>", | |
"organization": "Meta", | |
"vocab_size": 128256, | |
"num(digit)": 1110, | |
"len(digit)": "1,3,3", | |
"num(space)": 60860, | |
"len(space)": "1,6,128", | |
"num(ar)": 3810, | |
"len(ar)": "1,4,11", | |
"num(zh)": 4424, | |
"len(zh)": "1,1,7", | |
"num(ja)": 5387, | |
"len(ja)": "1,2,8", | |
"num(ja-kana)": 1086, | |
"len(ja-kana)": "1,2,8", | |
"num(ko)": 2281, | |
"len(ko)": "1,2,6" | |
}, | |
"mistralai/Mistral-Large-Instruct-2407": { | |
"tokenizer": "<a target=\"_blank\" href=\"https://huggingface.co/mistralai/Mistral-Large-Instruct-2407\" style=\"color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;\">Mistral-Large-Instruct-2407</a>", | |
"organization": "Mistral", | |
"vocab_size": 32768, | |
"num(digit)": 775, | |
"len(digit)": "1,13,18", | |
"num(space)": 15823, | |
"len(space)": "1,6,16", | |
"num(ar)": 71, | |
"len(ar)": "1,1,3", | |
"num(zh)": 1459, | |
"len(zh)": "1,1,2", | |
"num(ja)": 1593, | |
"len(ja)": "1,1,2", | |
"num(ja-kana)": 134, | |
"len(ja-kana)": "1,1,1", | |
"num(ko)": 346, | |
"len(ko)": "1,1,1" | |
} | |
} |