tokenizer-arena / stats /compress_rate.json
xu-song's picture
add compression leaderboard
1b7fc74
raw
history blame
40.9 kB
{
"amber.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"aya_101.cc100-en": {
"vocab_size": 250100,
"n_bytes": 1124813,
"n_tokens": 317881,
"n_chars": 1121360
},
"baichuan.cc100-en": {
"vocab_size": 64000,
"n_bytes": 1124813,
"n_tokens": 280108,
"n_chars": 1121360
},
"baichuan2.cc100-en": {
"vocab_size": 125696,
"n_bytes": 1124813,
"n_tokens": 269011,
"n_chars": 1121360
},
"bert_base_cased.cc100-en": {
"vocab_size": 28996,
"n_bytes": 1124813,
"n_tokens": 288022,
"n_chars": 1121360
},
"bert_base_chinese.cc100-en": {
"vocab_size": 21128,
"n_bytes": 1124813,
"n_tokens": 377068,
"n_chars": 1121360
},
"bert_base_uncased.cc100-en": {
"vocab_size": 30522,
"n_bytes": 1124813,
"n_tokens": 280575,
"n_chars": 1121360
},
"bloom.cc100-en": {
"vocab_size": 250680,
"n_bytes": 1124813,
"n_tokens": 257405,
"n_chars": 1121360
},
"byt5_small.cc100-en": {
"vocab_size": 384,
"n_bytes": 1124813,
"n_tokens": 1134813,
"n_chars": 1121360
},
"character_glm_6b.cc100-en": {
"vocab_size": 64789,
"n_bytes": 1124813,
"n_tokens": 289347,
"n_chars": 1121360
},
"chatglm2_6b.cc100-en": {
"vocab_size": 64787,
"n_bytes": 1124813,
"n_tokens": 289329,
"n_chars": 1121360
},
"chatglm3_6b.cc100-en": {
"vocab_size": 64796,
"n_bytes": 1124813,
"n_tokens": 289347,
"n_chars": 1121360
},
"chatglm_6b.cc100-en": {
"vocab_size": 150344,
"n_bytes": 1124813,
"n_tokens": 284761,
"n_chars": 1121360
},
"chatyuan_large_v2.cc100-en": {
"vocab_size": 32128,
"n_bytes": 1124813,
"n_tokens": 536033,
"n_chars": 1121360
},
"chinese_llama.cc100-en": {
"vocab_size": 49953,
"n_bytes": 1124813,
"n_tokens": 291514,
"n_chars": 1121360
},
"chinese_llama2.cc100-en": {
"vocab_size": 55296,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"code_davinci_002.cc100-en": {
"vocab_size": 50281,
"n_bytes": 1124813,
"n_tokens": 258403,
"n_chars": 1121360
},
"crystal_coder.cc100-en": {
"vocab_size": 32022,
"n_bytes": 1124813,
"n_tokens": 284627,
"n_chars": 1121360
},
"dbrx_instruct.cc100-en": {
"vocab_size": 100280,
"n_bytes": 1124813,
"n_tokens": 254985,
"n_chars": 1121360
},
"deepseek_coder_33b_instruct.cc100-en": {
"vocab_size": 32022,
"n_bytes": 1124813,
"n_tokens": 287408,
"n_chars": 1121360
},
"deepseek_llm_7b_base.cc100-en": {
"vocab_size": 100015,
"n_bytes": 1124813,
"n_tokens": 272324,
"n_chars": 1121360
},
"falcon_180b.cc100-en": {
"vocab_size": 65024,
"n_bytes": 1124813,
"n_tokens": 262509,
"n_chars": 1121360
},
"falcon_7b.cc100-en": {
"vocab_size": 65024,
"n_bytes": 1124813,
"n_tokens": 262509,
"n_chars": 1121360
},
"fastchat_t5_3b.cc100-en": {
"vocab_size": 32110,
"n_bytes": 1124813,
"n_tokens": 484941,
"n_chars": 1121360
},
"flan_t5_base.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"gemma_7b.cc100-en": {
"vocab_size": 256000,
"n_bytes": 1124813,
"n_tokens": 268010,
"n_chars": 1121360
},
"gpt2.cc100-en": {
"vocab_size": 50257,
"n_bytes": 1124813,
"n_tokens": 258428,
"n_chars": 1121360
},
"gpt2_chinese.cc100-en": {
"vocab_size": 21128,
"n_bytes": 1124813,
"n_tokens": 392641,
"n_chars": 1121360
},
"gpt_35_turbo.cc100-en": {
"vocab_size": 100277,
"n_bytes": 1124813,
"n_tokens": 254985,
"n_chars": 1121360
},
"gpt_4.cc100-en": {
"vocab_size": 100277,
"n_bytes": 1124813,
"n_tokens": 254985,
"n_chars": 1121360
},
"gpt_nexo_20b.cc100-en": {
"vocab_size": 50277,
"n_bytes": 1124813,
"n_tokens": 259357,
"n_chars": 1121360
},
"grok_1.cc100-en": {
"vocab_size": 131072,
"n_bytes": 1124813,
"n_tokens": 258048,
"n_chars": 1121360
},
"internlm2_chat_7b.cc100-en": {
"vocab_size": 92544,
"n_bytes": 1124813,
"n_tokens": 271583,
"n_chars": 1121360
},
"internlm2_math_7b.cc100-en": {
"vocab_size": 92544,
"n_bytes": 1124813,
"n_tokens": 271583,
"n_chars": 1121360
},
"internlm_chat_7b.cc100-en": {
"vocab_size": 103168,
"n_bytes": 1124813,
"n_tokens": 271293,
"n_chars": 1121360
},
"internlm_xcomposer_7b.cc100-en": {
"vocab_size": 103168,
"n_bytes": 1124813,
"n_tokens": 271293,
"n_chars": 1121360
},
"jamba_v0_1.cc100-en": {
"vocab_size": 65536,
"n_bytes": 1124813,
"n_tokens": 274242,
"n_chars": 1121360
},
"kplug.cc100-en": {
"vocab_size": 10261,
"n_bytes": 1124813,
"n_tokens": 393564,
"n_chars": 1121360
},
"llama.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"llama2.cc100-en": {
"vocab_size": 32001,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"llama3.cc100-en": {
"vocab_size": 128256,
"n_bytes": 1124813,
"n_tokens": 254944,
"n_chars": 1121360
},
"mistral_7b.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 285801,
"n_chars": 1121360
},
"mixtral_8_7b.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 285801,
"n_chars": 1121360
},
"mobilebert_uncased.cc100-en": {
"vocab_size": 30522,
"n_bytes": 1124813,
"n_tokens": 280575,
"n_chars": 1121360
},
"moss.cc100-en": {
"vocab_size": 106072,
"n_bytes": 1124813,
"n_tokens": 257070,
"n_chars": 1121360
},
"mt5_large.cc100-en": {
"vocab_size": 250100,
"n_bytes": 1124813,
"n_tokens": 317881,
"n_chars": 1121360
},
"olmo_7b.cc100-en": {
"vocab_size": 50280,
"n_bytes": 1124813,
"n_tokens": 259357,
"n_chars": 1121360
},
"orion_14b_chat.cc100-en": {
"vocab_size": 84608,
"n_bytes": 1124813,
"n_tokens": 265948,
"n_chars": 1121360
},
"phi_1.cc100-en": {
"vocab_size": 50295,
"n_bytes": 1124813,
"n_tokens": 258409,
"n_chars": 1121360
},
"phi_2.cc100-en": {
"vocab_size": 50295,
"n_bytes": 1124813,
"n_tokens": 258409,
"n_chars": 1121360
},
"phi_3_mini.cc100-en": {
"vocab_size": 32011,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"pko_t5_large.cc100-en": {
"vocab_size": 50358,
"n_bytes": 1124813,
"n_tokens": 658985,
"n_chars": 1121360
},
"prompt_clue.cc100-en": {
"vocab_size": 32128,
"n_bytes": 1124813,
"n_tokens": 536033,
"n_chars": 1121360
},
"qwen1_5_14b_chat.cc100-en": {
"vocab_size": 151646,
"n_bytes": 1124813,
"n_tokens": 257983,
"n_chars": 1121360
},
"qwen_1_8b_chat.cc100-en": {
"vocab_size": 151851,
"n_bytes": 1124813,
"n_tokens": 257983,
"n_chars": 1121360
},
"qwen_72b_chat.cc100-en": {
"vocab_size": 151851,
"n_bytes": 1124813,
"n_tokens": 257983,
"n_chars": 1121360
},
"qwen_7b_chat.cc100-en": {
"vocab_size": 151851,
"n_bytes": 1124813,
"n_tokens": 257983,
"n_chars": 1121360
},
"roberta_chinese_clue.cc100-en": {
"vocab_size": 8021,
"n_bytes": 1124813,
"n_tokens": 583058,
"n_chars": 1121360
},
"skywork_13b_base.cc100-en": {
"vocab_size": 65519,
"n_bytes": 1124813,
"n_tokens": 294617,
"n_chars": 1121360
},
"skywork_13b_math.cc100-en": {
"vocab_size": 65519,
"n_bytes": 1124813,
"n_tokens": 294617,
"n_chars": 1121360
},
"solar_10_7b.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 285801,
"n_chars": 1121360
},
"starchat_alpha.cc100-en": {
"vocab_size": 49156,
"n_bytes": 1124813,
"n_tokens": 288965,
"n_chars": 1121360
},
"switch_c_2048.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"t5_base.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"t5_large.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"t5_small.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"text_davinci_003.cc100-en": {
"vocab_size": 50281,
"n_bytes": 1124813,
"n_tokens": 258403,
"n_chars": 1121360
},
"tigerbot_13b_chat_v2.cc100-en": {
"vocab_size": 60515,
"n_bytes": 1124813,
"n_tokens": 285652,
"n_chars": 1121360
},
"tigerbot_70b_chat_v4_4k.cc100-en": {
"vocab_size": 65110,
"n_bytes": 1124813,
"n_tokens": 286946,
"n_chars": 1121360
},
"wizardcoder_15b_v1.cc100-en": {
"vocab_size": 49153,
"n_bytes": 1124813,
"n_tokens": 288965,
"n_chars": 1121360
},
"wizardcoder_python_7b_v1.cc100-en": {
"vocab_size": 32001,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"wizardlm_7b_v1.cc100-en": {
"vocab_size": 32001,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"wizardmath_70b_v1.cc100-en": {
"vocab_size": 32002,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"xlm_roberta.cc100-en": {
"vocab_size": 250002,
"n_bytes": 1124813,
"n_tokens": 300026,
"n_chars": 1121360
},
"yi_34b.cc100-en": {
"vocab_size": 64000,
"n_bytes": 1124813,
"n_tokens": 270400,
"n_chars": 1121360
},
"yi_6b.cc100-en": {
"vocab_size": 64000,
"n_bytes": 1124813,
"n_tokens": 270400,
"n_chars": 1121360
},
"yi_vl34b.cc100-en": {
"vocab_size": 64000,
"n_bytes": 1124813,
"n_tokens": 269738,
"n_chars": 1121360
},
"zephyr_7b_beta.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 285801,
"n_chars": 1121360
},
"amber.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"aya_101.cc100-zh-Hans": {
"vocab_size": 250100,
"n_bytes": 2633047,
"n_tokens": 631182,
"n_chars": 927311
},
"baichuan.cc100-zh-Hans": {
"vocab_size": 64000,
"n_bytes": 2633047,
"n_tokens": 626117,
"n_chars": 927311
},
"baichuan2.cc100-zh-Hans": {
"vocab_size": 125696,
"n_bytes": 2633047,
"n_tokens": 541464,
"n_chars": 927311
},
"bert_base_cased.cc100-zh-Hans": {
"vocab_size": 28996,
"n_bytes": 2633047,
"n_tokens": 899709,
"n_chars": 927311
},
"bert_base_chinese.cc100-zh-Hans": {
"vocab_size": 21128,
"n_bytes": 2633047,
"n_tokens": 896599,
"n_chars": 927311
},
"bert_base_uncased.cc100-zh-Hans": {
"vocab_size": 30522,
"n_bytes": 2633047,
"n_tokens": 898554,
"n_chars": 927311
},
"bloom.cc100-zh-Hans": {
"vocab_size": 250680,
"n_bytes": 2633047,
"n_tokens": 573008,
"n_chars": 927311
},
"byt5_small.cc100-zh-Hans": {
"vocab_size": 384,
"n_bytes": 2633047,
"n_tokens": 2643047,
"n_chars": 927311
},
"character_glm_6b.cc100-zh-Hans": {
"vocab_size": 64789,
"n_bytes": 2633047,
"n_tokens": 583646,
"n_chars": 927311
},
"chatglm2_6b.cc100-zh-Hans": {
"vocab_size": 64787,
"n_bytes": 2633047,
"n_tokens": 583646,
"n_chars": 927311
},
"chatglm3_6b.cc100-zh-Hans": {
"vocab_size": 64796,
"n_bytes": 2633047,
"n_tokens": 583646,
"n_chars": 927311
},
"chatglm_6b.cc100-zh-Hans": {
"vocab_size": 150344,
"n_bytes": 2633047,
"n_tokens": 527384,
"n_chars": 927311
},
"chatyuan_large_v2.cc100-zh-Hans": {
"vocab_size": 32128,
"n_bytes": 2633047,
"n_tokens": 564905,
"n_chars": 927311
},
"chinese_llama.cc100-zh-Hans": {
"vocab_size": 49953,
"n_bytes": 2633047,
"n_tokens": 623219,
"n_chars": 927311
},
"chinese_llama2.cc100-zh-Hans": {
"vocab_size": 55296,
"n_bytes": 2633047,
"n_tokens": 625766,
"n_chars": 927311
},
"code_davinci_002.cc100-zh-Hans": {
"vocab_size": 50281,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"crystal_coder.cc100-zh-Hans": {
"vocab_size": 32022,
"n_bytes": 2633047,
"n_tokens": 1320093,
"n_chars": 927311
},
"dbrx_instruct.cc100-zh-Hans": {
"vocab_size": 100280,
"n_bytes": 2633047,
"n_tokens": 1084939,
"n_chars": 927311
},
"deepseek_coder_33b_instruct.cc100-zh-Hans": {
"vocab_size": 32022,
"n_bytes": 2633047,
"n_tokens": 720577,
"n_chars": 927311
},
"deepseek_llm_7b_base.cc100-zh-Hans": {
"vocab_size": 100015,
"n_bytes": 2633047,
"n_tokens": 605081,
"n_chars": 927311
},
"falcon_180b.cc100-zh-Hans": {
"vocab_size": 65024,
"n_bytes": 2633047,
"n_tokens": 1124681,
"n_chars": 927311
},
"falcon_7b.cc100-zh-Hans": {
"vocab_size": 65024,
"n_bytes": 2633047,
"n_tokens": 1124681,
"n_chars": 927311
},
"fastchat_t5_3b.cc100-zh-Hans": {
"vocab_size": 32110,
"n_bytes": 2633047,
"n_tokens": 178974,
"n_chars": 927311
},
"flan_t5_base.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173520,
"n_chars": 927311
},
"gemma_7b.cc100-zh-Hans": {
"vocab_size": 256000,
"n_bytes": 2633047,
"n_tokens": 641795,
"n_chars": 927311
},
"gpt2.cc100-zh-Hans": {
"vocab_size": 50257,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"gpt2_chinese.cc100-zh-Hans": {
"vocab_size": 21128,
"n_bytes": 2633047,
"n_tokens": 899506,
"n_chars": 927311
},
"gpt_35_turbo.cc100-zh-Hans": {
"vocab_size": 100277,
"n_bytes": 2633047,
"n_tokens": 1084939,
"n_chars": 927311
},
"gpt_4.cc100-zh-Hans": {
"vocab_size": 100277,
"n_bytes": 2633047,
"n_tokens": 1084939,
"n_chars": 927311
},
"gpt_nexo_20b.cc100-zh-Hans": {
"vocab_size": 50277,
"n_bytes": 2633047,
"n_tokens": 1220529,
"n_chars": 927311
},
"grok_1.cc100-zh-Hans": {
"vocab_size": 131072,
"n_bytes": 2633047,
"n_tokens": 1414508,
"n_chars": 927311
},
"internlm2_chat_7b.cc100-zh-Hans": {
"vocab_size": 92544,
"n_bytes": 2633047,
"n_tokens": 579976,
"n_chars": 927311
},
"internlm2_math_7b.cc100-zh-Hans": {
"vocab_size": 92544,
"n_bytes": 2633047,
"n_tokens": 579976,
"n_chars": 927311
},
"internlm_chat_7b.cc100-zh-Hans": {
"vocab_size": 103168,
"n_bytes": 2633047,
"n_tokens": 579109,
"n_chars": 927311
},
"internlm_xcomposer_7b.cc100-zh-Hans": {
"vocab_size": 103168,
"n_bytes": 2633047,
"n_tokens": 579109,
"n_chars": 927311
},
"jamba_v0_1.cc100-zh-Hans": {
"vocab_size": 65536,
"n_bytes": 2633047,
"n_tokens": 1067054,
"n_chars": 927311
},
"kplug.cc100-zh-Hans": {
"vocab_size": 10261,
"n_bytes": 2633047,
"n_tokens": 902451,
"n_chars": 927311
},
"llama.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"llama2.cc100-zh-Hans": {
"vocab_size": 32001,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"llama3.cc100-zh-Hans": {
"vocab_size": 128256,
"n_bytes": 2633047,
"n_tokens": 747405,
"n_chars": 927311
},
"mistral_7b.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1041023,
"n_chars": 927311
},
"mixtral_8_7b.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1041023,
"n_chars": 927311
},
"mobilebert_uncased.cc100-zh-Hans": {
"vocab_size": 30522,
"n_bytes": 2633047,
"n_tokens": 898554,
"n_chars": 927311
},
"moss.cc100-zh-Hans": {
"vocab_size": 106072,
"n_bytes": 2633047,
"n_tokens": 557455,
"n_chars": 927311
},
"mt5_large.cc100-zh-Hans": {
"vocab_size": 250100,
"n_bytes": 2633047,
"n_tokens": 631182,
"n_chars": 927311
},
"olmo_7b.cc100-zh-Hans": {
"vocab_size": 50280,
"n_bytes": 2633047,
"n_tokens": 1220529,
"n_chars": 927311
},
"orion_14b_chat.cc100-zh-Hans": {
"vocab_size": 84608,
"n_bytes": 2633047,
"n_tokens": 529926,
"n_chars": 927311
},
"phi_1.cc100-zh-Hans": {
"vocab_size": 50295,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"phi_2.cc100-zh-Hans": {
"vocab_size": 50295,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"phi_3_mini.cc100-zh-Hans": {
"vocab_size": 32011,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"pko_t5_large.cc100-zh-Hans": {
"vocab_size": 50358,
"n_bytes": 2633047,
"n_tokens": 2533519,
"n_chars": 927311
},
"prompt_clue.cc100-zh-Hans": {
"vocab_size": 32128,
"n_bytes": 2633047,
"n_tokens": 564905,
"n_chars": 927311
},
"qwen1_5_14b_chat.cc100-zh-Hans": {
"vocab_size": 151646,
"n_bytes": 2633047,
"n_tokens": 589211,
"n_chars": 927311
},
"qwen_1_8b_chat.cc100-zh-Hans": {
"vocab_size": 151851,
"n_bytes": 2633047,
"n_tokens": 589211,
"n_chars": 927311
},
"qwen_72b_chat.cc100-zh-Hans": {
"vocab_size": 151851,
"n_bytes": 2633047,
"n_tokens": 589211,
"n_chars": 927311
},
"qwen_7b_chat.cc100-zh-Hans": {
"vocab_size": 151851,
"n_bytes": 2633047,
"n_tokens": 589211,
"n_chars": 927311
},
"roberta_chinese_clue.cc100-zh-Hans": {
"vocab_size": 8021,
"n_bytes": 2633047,
"n_tokens": 907144,
"n_chars": 927311
},
"skywork_13b_base.cc100-zh-Hans": {
"vocab_size": 65519,
"n_bytes": 2633047,
"n_tokens": 663923,
"n_chars": 927311
},
"skywork_13b_math.cc100-zh-Hans": {
"vocab_size": 65519,
"n_bytes": 2633047,
"n_tokens": 663923,
"n_chars": 927311
},
"solar_10_7b.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1041023,
"n_chars": 927311
},
"starchat_alpha.cc100-zh-Hans": {
"vocab_size": 49156,
"n_bytes": 2633047,
"n_tokens": 882018,
"n_chars": 927311
},
"switch_c_2048.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173519,
"n_chars": 927311
},
"t5_base.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173519,
"n_chars": 927311
},
"t5_large.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173519,
"n_chars": 927311
},
"t5_small.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173519,
"n_chars": 927311
},
"text_davinci_003.cc100-zh-Hans": {
"vocab_size": 50281,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"tigerbot_13b_chat_v2.cc100-zh-Hans": {
"vocab_size": 60515,
"n_bytes": 2633047,
"n_tokens": 577385,
"n_chars": 927311
},
"tigerbot_70b_chat_v4_4k.cc100-zh-Hans": {
"vocab_size": 65110,
"n_bytes": 2633047,
"n_tokens": 577211,
"n_chars": 927311
},
"wizardcoder_15b_v1.cc100-zh-Hans": {
"vocab_size": 49153,
"n_bytes": 2633047,
"n_tokens": 882018,
"n_chars": 927311
},
"wizardcoder_python_7b_v1.cc100-zh-Hans": {
"vocab_size": 32001,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"wizardlm_7b_v1.cc100-zh-Hans": {
"vocab_size": 32001,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"wizardmath_70b_v1.cc100-zh-Hans": {
"vocab_size": 32002,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"xlm_roberta.cc100-zh-Hans": {
"vocab_size": 250002,
"n_bytes": 2633047,
"n_tokens": 619844,
"n_chars": 927311
},
"yi_34b.cc100-zh-Hans": {
"vocab_size": 64000,
"n_bytes": 2633047,
"n_tokens": 588729,
"n_chars": 927311
},
"yi_6b.cc100-zh-Hans": {
"vocab_size": 64000,
"n_bytes": 2633047,
"n_tokens": 588729,
"n_chars": 927311
},
"yi_vl34b.cc100-zh-Hans": {
"vocab_size": 64000,
"n_bytes": 2633047,
"n_tokens": 596166,
"n_chars": 927311
},
"zephyr_7b_beta.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1041023,
"n_chars": 927311
},
"amber.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"aya_101.cc100-es": {
"vocab_size": 250100,
"n_bytes": 1664455,
"n_tokens": 472231,
"n_chars": 1630297
},
"baichuan.cc100-es": {
"vocab_size": 64000,
"n_bytes": 1664455,
"n_tokens": 585804,
"n_chars": 1630297
},
"baichuan2.cc100-es": {
"vocab_size": 125696,
"n_bytes": 1664455,
"n_tokens": 551326,
"n_chars": 1630297
},
"bert_base_cased.cc100-es": {
"vocab_size": 28996,
"n_bytes": 1664455,
"n_tokens": 630231,
"n_chars": 1630297
},
"bert_base_chinese.cc100-es": {
"vocab_size": 21128,
"n_bytes": 1664455,
"n_tokens": 609419,
"n_chars": 1630297
},
"bert_base_uncased.cc100-es": {
"vocab_size": 30522,
"n_bytes": 1664455,
"n_tokens": 558042,
"n_chars": 1630297
},
"bloom.cc100-es": {
"vocab_size": 250680,
"n_bytes": 1664455,
"n_tokens": 350793,
"n_chars": 1630297
},
"byt5_small.cc100-es": {
"vocab_size": 384,
"n_bytes": 1664455,
"n_tokens": 1674455,
"n_chars": 1630297
},
"character_glm_6b.cc100-es": {
"vocab_size": 64789,
"n_bytes": 1664455,
"n_tokens": 566501,
"n_chars": 1630297
},
"chatglm2_6b.cc100-es": {
"vocab_size": 64787,
"n_bytes": 1664455,
"n_tokens": 566476,
"n_chars": 1630297
},
"chatglm3_6b.cc100-es": {
"vocab_size": 64796,
"n_bytes": 1664455,
"n_tokens": 566501,
"n_chars": 1630297
},
"chatglm_6b.cc100-es": {
"vocab_size": 150344,
"n_bytes": 1664455,
"n_tokens": 514848,
"n_chars": 1630297
},
"chatyuan_large_v2.cc100-es": {
"vocab_size": 32128,
"n_bytes": 1664455,
"n_tokens": 889530,
"n_chars": 1630297
},
"chinese_llama.cc100-es": {
"vocab_size": 49953,
"n_bytes": 1664455,
"n_tokens": 486672,
"n_chars": 1630297
},
"chinese_llama2.cc100-es": {
"vocab_size": 55296,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"code_davinci_002.cc100-es": {
"vocab_size": 50281,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"crystal_coder.cc100-es": {
"vocab_size": 32022,
"n_bytes": 1664455,
"n_tokens": 482235,
"n_chars": 1630297
},
"dbrx_instruct.cc100-es": {
"vocab_size": 100280,
"n_bytes": 1664455,
"n_tokens": 433875,
"n_chars": 1630297
},
"deepseek_coder_33b_instruct.cc100-es": {
"vocab_size": 32022,
"n_bytes": 1664455,
"n_tokens": 523884,
"n_chars": 1630297
},
"deepseek_llm_7b_base.cc100-es": {
"vocab_size": 100015,
"n_bytes": 1664455,
"n_tokens": 480877,
"n_chars": 1630297
},
"falcon_180b.cc100-es": {
"vocab_size": 65024,
"n_bytes": 1664455,
"n_tokens": 442138,
"n_chars": 1630297
},
"falcon_7b.cc100-es": {
"vocab_size": 65024,
"n_bytes": 1664455,
"n_tokens": 442138,
"n_chars": 1630297
},
"fastchat_t5_3b.cc100-es": {
"vocab_size": 32110,
"n_bytes": 1664455,
"n_tokens": 970105,
"n_chars": 1630297
},
"flan_t5_base.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706405,
"n_chars": 1630297
},
"gemma_7b.cc100-es": {
"vocab_size": 256000,
"n_bytes": 1664455,
"n_tokens": 371321,
"n_chars": 1630297
},
"gpt2.cc100-es": {
"vocab_size": 50257,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"gpt2_chinese.cc100-es": {
"vocab_size": 21128,
"n_bytes": 1664455,
"n_tokens": 703390,
"n_chars": 1630297
},
"gpt_35_turbo.cc100-es": {
"vocab_size": 100277,
"n_bytes": 1664455,
"n_tokens": 433875,
"n_chars": 1630297
},
"gpt_4.cc100-es": {
"vocab_size": 100277,
"n_bytes": 1664455,
"n_tokens": 433875,
"n_chars": 1630297
},
"gpt_nexo_20b.cc100-es": {
"vocab_size": 50277,
"n_bytes": 1664455,
"n_tokens": 494577,
"n_chars": 1630297
},
"grok_1.cc100-es": {
"vocab_size": 131072,
"n_bytes": 1664455,
"n_tokens": 449392,
"n_chars": 1630297
},
"internlm2_chat_7b.cc100-es": {
"vocab_size": 92544,
"n_bytes": 1664455,
"n_tokens": 518871,
"n_chars": 1630297
},
"internlm2_math_7b.cc100-es": {
"vocab_size": 92544,
"n_bytes": 1664455,
"n_tokens": 518871,
"n_chars": 1630297
},
"internlm_chat_7b.cc100-es": {
"vocab_size": 103168,
"n_bytes": 1664455,
"n_tokens": 516572,
"n_chars": 1630297
},
"internlm_xcomposer_7b.cc100-es": {
"vocab_size": 103168,
"n_bytes": 1664455,
"n_tokens": 516572,
"n_chars": 1630297
},
"jamba_v0_1.cc100-es": {
"vocab_size": 65536,
"n_bytes": 1664455,
"n_tokens": 420883,
"n_chars": 1630297
},
"kplug.cc100-es": {
"vocab_size": 10261,
"n_bytes": 1664455,
"n_tokens": 704804,
"n_chars": 1630297
},
"llama.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"llama2.cc100-es": {
"vocab_size": 32001,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"llama3.cc100-es": {
"vocab_size": 128256,
"n_bytes": 1664455,
"n_tokens": 433289,
"n_chars": 1630297
},
"mistral_7b.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 513915,
"n_chars": 1630297
},
"mixtral_8_7b.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 513915,
"n_chars": 1630297
},
"mobilebert_uncased.cc100-es": {
"vocab_size": 30522,
"n_bytes": 1664455,
"n_tokens": 558042,
"n_chars": 1630297
},
"moss.cc100-es": {
"vocab_size": 106072,
"n_bytes": 1664455,
"n_tokens": 568539,
"n_chars": 1630297
},
"mt5_large.cc100-es": {
"vocab_size": 250100,
"n_bytes": 1664455,
"n_tokens": 472231,
"n_chars": 1630297
},
"olmo_7b.cc100-es": {
"vocab_size": 50280,
"n_bytes": 1664455,
"n_tokens": 494577,
"n_chars": 1630297
},
"orion_14b_chat.cc100-es": {
"vocab_size": 84608,
"n_bytes": 1664455,
"n_tokens": 628571,
"n_chars": 1630297
},
"phi_1.cc100-es": {
"vocab_size": 50295,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"phi_2.cc100-es": {
"vocab_size": 50295,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"phi_3_mini.cc100-es": {
"vocab_size": 32011,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"pko_t5_large.cc100-es": {
"vocab_size": 50358,
"n_bytes": 1664455,
"n_tokens": 1134056,
"n_chars": 1630297
},
"prompt_clue.cc100-es": {
"vocab_size": 32128,
"n_bytes": 1664455,
"n_tokens": 889530,
"n_chars": 1630297
},
"qwen1_5_14b_chat.cc100-es": {
"vocab_size": 151646,
"n_bytes": 1664455,
"n_tokens": 434264,
"n_chars": 1630297
},
"qwen_1_8b_chat.cc100-es": {
"vocab_size": 151851,
"n_bytes": 1664455,
"n_tokens": 434264,
"n_chars": 1630297
},
"qwen_72b_chat.cc100-es": {
"vocab_size": 151851,
"n_bytes": 1664455,
"n_tokens": 434264,
"n_chars": 1630297
},
"qwen_7b_chat.cc100-es": {
"vocab_size": 151851,
"n_bytes": 1664455,
"n_tokens": 434264,
"n_chars": 1630297
},
"roberta_chinese_clue.cc100-es": {
"vocab_size": 8021,
"n_bytes": 1664455,
"n_tokens": 866564,
"n_chars": 1630297
},
"skywork_13b_base.cc100-es": {
"vocab_size": 65519,
"n_bytes": 1664455,
"n_tokens": 492211,
"n_chars": 1630297
},
"skywork_13b_math.cc100-es": {
"vocab_size": 65519,
"n_bytes": 1664455,
"n_tokens": 492211,
"n_chars": 1630297
},
"solar_10_7b.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 513915,
"n_chars": 1630297
},
"starchat_alpha.cc100-es": {
"vocab_size": 49156,
"n_bytes": 1664455,
"n_tokens": 530592,
"n_chars": 1630297
},
"switch_c_2048.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706400,
"n_chars": 1630297
},
"t5_base.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706400,
"n_chars": 1630297
},
"t5_large.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706400,
"n_chars": 1630297
},
"t5_small.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706400,
"n_chars": 1630297
},
"text_davinci_003.cc100-es": {
"vocab_size": 50281,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"tigerbot_13b_chat_v2.cc100-es": {
"vocab_size": 60515,
"n_bytes": 1664455,
"n_tokens": 482553,
"n_chars": 1630297
},
"tigerbot_70b_chat_v4_4k.cc100-es": {
"vocab_size": 65110,
"n_bytes": 1664455,
"n_tokens": 484099,
"n_chars": 1630297
},
"wizardcoder_15b_v1.cc100-es": {
"vocab_size": 49153,
"n_bytes": 1664455,
"n_tokens": 530592,
"n_chars": 1630297
},
"wizardcoder_python_7b_v1.cc100-es": {
"vocab_size": 32001,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"wizardlm_7b_v1.cc100-es": {
"vocab_size": 32001,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"wizardmath_70b_v1.cc100-es": {
"vocab_size": 32002,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"xlm_roberta.cc100-es": {
"vocab_size": 250002,
"n_bytes": 1664455,
"n_tokens": 399850,
"n_chars": 1630297
},
"yi_34b.cc100-es": {
"vocab_size": 64000,
"n_bytes": 1664455,
"n_tokens": 577018,
"n_chars": 1630297
},
"yi_6b.cc100-es": {
"vocab_size": 64000,
"n_bytes": 1664455,
"n_tokens": 577018,
"n_chars": 1630297
},
"yi_vl34b.cc100-es": {
"vocab_size": 64000,
"n_bytes": 1664455,
"n_tokens": 576794,
"n_chars": 1630297
},
"zephyr_7b_beta.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 513915,
"n_chars": 1630297
},
"aya_101.cc100-fr": {
"vocab_size": 250100,
"n_bytes": 1540504,
"n_tokens": 470944,
"n_chars": 1484970
},
"baichuan.cc100-fr": {
"vocab_size": 64000,
"n_bytes": 1540504,
"n_tokens": 540430,
"n_chars": 1484970
},
"baichuan2.cc100-fr": {
"vocab_size": 125696,
"n_bytes": 1540504,
"n_tokens": 512313,
"n_chars": 1484970
},
"bert_base_cased.cc100-fr": {
"vocab_size": 28996,
"n_bytes": 1540504,
"n_tokens": 583210,
"n_chars": 1484970
},
"bert_base_chinese.cc100-fr": {
"vocab_size": 21128,
"n_bytes": 1540504,
"n_tokens": 553134,
"n_chars": 1484970
},
"bert_base_uncased.cc100-fr": {
"vocab_size": 30522,
"n_bytes": 1540504,
"n_tokens": 504075,
"n_chars": 1484970
},
"bloom.cc100-fr": {
"vocab_size": 250680,
"n_bytes": 1540504,
"n_tokens": 321639,
"n_chars": 1484970
},
"byt5_small.cc100-fr": {
"vocab_size": 384,
"n_bytes": 1540504,
"n_tokens": 1550504,
"n_chars": 1484970
},
"character_glm_6b.cc100-fr": {
"vocab_size": 64789,
"n_bytes": 1540504,
"n_tokens": 515052,
"n_chars": 1484970
},
"chatglm2_6b.cc100-fr": {
"vocab_size": 64787,
"n_bytes": 1540504,
"n_tokens": 515028,
"n_chars": 1484970
},
"chatglm3_6b.cc100-fr": {
"vocab_size": 64796,
"n_bytes": 1540504,
"n_tokens": 515052,
"n_chars": 1484970
},
"chatglm_6b.cc100-fr": {
"vocab_size": 150344,
"n_bytes": 1540504,
"n_tokens": 499261,
"n_chars": 1484970
},
"chatyuan_large_v2.cc100-fr": {
"vocab_size": 32128,
"n_bytes": 1540504,
"n_tokens": 822012,
"n_chars": 1484970
},
"chinese_llama.cc100-fr": {
"vocab_size": 49953,
"n_bytes": 1540504,
"n_tokens": 450352,
"n_chars": 1484970
},
"chinese_llama2.cc100-fr": {
"vocab_size": 55296,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"code_davinci_002.cc100-fr": {
"vocab_size": 50281,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"crystal_coder.cc100-fr": {
"vocab_size": 32022,
"n_bytes": 1540504,
"n_tokens": 447243,
"n_chars": 1484970
},
"dbrx_instruct.cc100-fr": {
"vocab_size": 100280,
"n_bytes": 1540504,
"n_tokens": 412685,
"n_chars": 1484970
},
"deepseek_coder_33b_instruct.cc100-fr": {
"vocab_size": 32022,
"n_bytes": 1540504,
"n_tokens": 537538,
"n_chars": 1484970
},
"deepseek_llm_7b_base.cc100-fr": {
"vocab_size": 100015,
"n_bytes": 1540504,
"n_tokens": 507693,
"n_chars": 1484970
},
"falcon_180b.cc100-fr": {
"vocab_size": 65024,
"n_bytes": 1540504,
"n_tokens": 407853,
"n_chars": 1484970
},
"falcon_7b.cc100-fr": {
"vocab_size": 65024,
"n_bytes": 1540504,
"n_tokens": 407853,
"n_chars": 1484970
},
"fastchat_t5_3b.cc100-fr": {
"vocab_size": 32110,
"n_bytes": 1540504,
"n_tokens": 717675,
"n_chars": 1484970
},
"flan_t5_base.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476135,
"n_chars": 1484970
},
"gemma_7b.cc100-fr": {
"vocab_size": 256000,
"n_bytes": 1540504,
"n_tokens": 374551,
"n_chars": 1484970
},
"gpt2.cc100-fr": {
"vocab_size": 50257,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"gpt2_chinese.cc100-fr": {
"vocab_size": 21128,
"n_bytes": 1540504,
"n_tokens": 636442,
"n_chars": 1484970
},
"gpt_35_turbo.cc100-fr": {
"vocab_size": 100277,
"n_bytes": 1540504,
"n_tokens": 412685,
"n_chars": 1484970
},
"gpt_4.cc100-fr": {
"vocab_size": 100277,
"n_bytes": 1540504,
"n_tokens": 412685,
"n_chars": 1484970
},
"gpt_nexo_20b.cc100-fr": {
"vocab_size": 50277,
"n_bytes": 1540504,
"n_tokens": 458961,
"n_chars": 1484970
},
"grok_1.cc100-fr": {
"vocab_size": 131072,
"n_bytes": 1540504,
"n_tokens": 428298,
"n_chars": 1484970
},
"internlm2_chat_7b.cc100-fr": {
"vocab_size": 92544,
"n_bytes": 1540504,
"n_tokens": 496629,
"n_chars": 1484970
},
"internlm2_math_7b.cc100-fr": {
"vocab_size": 92544,
"n_bytes": 1540504,
"n_tokens": 496629,
"n_chars": 1484970
},
"internlm_chat_7b.cc100-fr": {
"vocab_size": 103168,
"n_bytes": 1540504,
"n_tokens": 495045,
"n_chars": 1484970
},
"internlm_xcomposer_7b.cc100-fr": {
"vocab_size": 103168,
"n_bytes": 1540504,
"n_tokens": 495045,
"n_chars": 1484970
},
"jamba_v0_1.cc100-fr": {
"vocab_size": 65536,
"n_bytes": 1540504,
"n_tokens": 412899,
"n_chars": 1484970
},
"kplug.cc100-fr": {
"vocab_size": 10261,
"n_bytes": 1540504,
"n_tokens": 638107,
"n_chars": 1484970
},
"llama.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"llama2.cc100-fr": {
"vocab_size": 32001,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"llama3.cc100-fr": {
"vocab_size": 128256,
"n_bytes": 1540504,
"n_tokens": 412146,
"n_chars": 1484970
},
"mistral_7b.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 476666,
"n_chars": 1484970
},
"mixtral_8_7b.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 476666,
"n_chars": 1484970
},
"mobilebert_uncased.cc100-fr": {
"vocab_size": 30522,
"n_bytes": 1540504,
"n_tokens": 504075,
"n_chars": 1484970
},
"moss.cc100-fr": {
"vocab_size": 106072,
"n_bytes": 1540504,
"n_tokens": 515669,
"n_chars": 1484970
},
"mt5_large.cc100-fr": {
"vocab_size": 250100,
"n_bytes": 1540504,
"n_tokens": 470944,
"n_chars": 1484970
},
"olmo_7b.cc100-fr": {
"vocab_size": 50280,
"n_bytes": 1540504,
"n_tokens": 458961,
"n_chars": 1484970
},
"orion_14b_chat.cc100-fr": {
"vocab_size": 84608,
"n_bytes": 1540504,
"n_tokens": 564107,
"n_chars": 1484970
},
"phi_1.cc100-fr": {
"vocab_size": 50295,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"phi_2.cc100-fr": {
"vocab_size": 50295,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"phi_3_mini.cc100-fr": {
"vocab_size": 32011,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"pko_t5_large.cc100-fr": {
"vocab_size": 50358,
"n_bytes": 1540504,
"n_tokens": 1044665,
"n_chars": 1484970
},
"prompt_clue.cc100-fr": {
"vocab_size": 32128,
"n_bytes": 1540504,
"n_tokens": 822012,
"n_chars": 1484970
},
"qwen1_5_14b_chat.cc100-fr": {
"vocab_size": 151646,
"n_bytes": 1540504,
"n_tokens": 413637,
"n_chars": 1484970
},
"qwen_1_8b_chat.cc100-fr": {
"vocab_size": 151851,
"n_bytes": 1540504,
"n_tokens": 413637,
"n_chars": 1484970
},
"qwen_72b_chat.cc100-fr": {
"vocab_size": 151851,
"n_bytes": 1540504,
"n_tokens": 413637,
"n_chars": 1484970
},
"qwen_7b_chat.cc100-fr": {
"vocab_size": 151851,
"n_bytes": 1540504,
"n_tokens": 413637,
"n_chars": 1484970
},
"roberta_chinese_clue.cc100-fr": {
"vocab_size": 8021,
"n_bytes": 1540504,
"n_tokens": 787363,
"n_chars": 1484970
},
"skywork_13b_base.cc100-fr": {
"vocab_size": 65519,
"n_bytes": 1540504,
"n_tokens": 457233,
"n_chars": 1484970
},
"skywork_13b_math.cc100-fr": {
"vocab_size": 65519,
"n_bytes": 1540504,
"n_tokens": 457233,
"n_chars": 1484970
},
"solar_10_7b.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 476666,
"n_chars": 1484970
},
"starchat_alpha.cc100-fr": {
"vocab_size": 49156,
"n_bytes": 1540504,
"n_tokens": 509958,
"n_chars": 1484970
},
"switch_c_2048.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476133,
"n_chars": 1484970
},
"t5_base.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476133,
"n_chars": 1484970
},
"t5_large.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476133,
"n_chars": 1484970
},
"t5_small.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476133,
"n_chars": 1484970
},
"text_davinci_003.cc100-fr": {
"vocab_size": 50281,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"tigerbot_13b_chat_v2.cc100-fr": {
"vocab_size": 60515,
"n_bytes": 1540504,
"n_tokens": 447372,
"n_chars": 1484970
},
"tigerbot_70b_chat_v4_4k.cc100-fr": {
"vocab_size": 65110,
"n_bytes": 1540504,
"n_tokens": 448567,
"n_chars": 1484970
},
"wizardcoder_15b_v1.cc100-fr": {
"vocab_size": 49153,
"n_bytes": 1540504,
"n_tokens": 509958,
"n_chars": 1484970
},
"wizardcoder_python_7b_v1.cc100-fr": {
"vocab_size": 32001,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"wizardlm_7b_v1.cc100-fr": {
"vocab_size": 32001,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"wizardmath_70b_v1.cc100-fr": {
"vocab_size": 32002,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"xlm_roberta.cc100-fr": {
"vocab_size": 250002,
"n_bytes": 1540504,
"n_tokens": 405041,
"n_chars": 1484970
},
"yi_34b.cc100-fr": {
"vocab_size": 64000,
"n_bytes": 1540504,
"n_tokens": 533106,
"n_chars": 1484970
},
"yi_6b.cc100-fr": {
"vocab_size": 64000,
"n_bytes": 1540504,
"n_tokens": 533106,
"n_chars": 1484970
},
"yi_vl34b.cc100-fr": {
"vocab_size": 64000,
"n_bytes": 1540504,
"n_tokens": 532288,
"n_chars": 1484970
},
"zephyr_7b_beta.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 476666,
"n_chars": 1484970
}
}