diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index d6cac6e5968b2d90fcea19d0358da9f05dba3c91..727f25051d0fa9fc241f6dc1e11f8c3386cf97a9 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,10 @@ ---- -license: gemma ---- +--- +license: gemma +base_model: UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3 +base_model_relation: quantized +library_name: mlc-llm +pipeline_tag: text-generation + +--- + +4-bit [OmniQuant](https://arxiv.org/abs/2308.13137) quantized version of [Gemma-2-9B-It-SPPO-Iter3](https://huggingface.co/UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3). diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9e7e08f5c8e277065c653a7c72987d064f55208 --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "quantization_config": { + "bits": 4 + } +} diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..5ea9cd4cda1ad3f9463e50bb22c385cedf998274 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,6425 @@ +{ + "metadata": { + "ParamSize": 507, + "ParamBytes": 4766153728.0, + "BitsPerParam": 4.068522985105678 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 458752000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 256000, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 458752000, + "byteOffset": 0 + } + ], + "md5sum": "7e4576c07fce908093414ffff240dcdd" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "8a734062e073c60f505e870cef9c9e64" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "30ff03988087e1a456ded808ca5c3b4d" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31911936, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 256000, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336000, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 14336000 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 14343168 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 15145984 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16751616 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16758784 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 16765952 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 16773120 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 31453184 + } + ], + "md5sum": "e61687e23140c4be0eb472f8b8b46d06" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33266688, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 7340032 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7569408 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 7576576 + } + ], + "md5sum": "e1e54aac747eab875148d4984ae3e402" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "fa9ae61c4e80101f2ac34f7c6cafb64f" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "3fe14ce3b96be382bcea844fcc79d6a9" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "b6c3759974f13f1c5810a8de93ff8cb3" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 27575296, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 802816 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2408448 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2415616 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2422784 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 2429952 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 17110016 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 17568768 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 24908800 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 25145344 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 25948160 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27553792 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27560960 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27568128 + } + ], + "md5sum": "c61a981099af5c1c88c067339f9addb9" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "9828f6599fc633c572fc30cf2ba1fae4" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "e9606185e03eb1efead4c092df5539c0" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "a0450307675d4457191fb8a29e54d991" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "7336c7ed20a9ee95956ce177ec9f1c5c" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "118cc5d1a6fab3c53404fe4737dadf96" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "0be024d72cd013ccf872850173b5e4c6" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "2a6e654801f367267ffdddf2b7928b63" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "90c3a2dcabb63c0713d11e1e457ce114" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "85fdc885a64ae92e4b2fef970550b785" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "2385463efb3e6207a8d9c1c3221f846a" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "9bf221721002f35c2d045a797243de10" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "bb64e94e9ec18d3e3629667d418c0afc" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "236c0bd1a991714f0ef9401d4e2d4d55" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 24313856, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 22708224 + } + ], + "md5sum": "82d6d0465fa8507f12a596124bb9c102" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "b0aad1cbc9f37ef38df8d96949788d2c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "164e76e7d19882e05fdde875f297dc22" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "48116ee57504e6aac7abe8e69aab5f6c" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "471e68b3e7adaa36dafa06e8818a4c99" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "9b05f047d0ddc5750ebc89742ada351c" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "7fab3bc28f09cef709b61029cf8cc676" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "fb4acb9b8f0a9ccfdfb9db758eb4b4a2" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "c950b21690cc0df1fd0b387a5d53f070" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "b91f7e72a2926aed6ec8bd246fef8b51" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "c58d01a4f9930922ed1e0777e7bf9b89" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "de8c77d182370deb00bc978f3b4c8f36" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "f78a3d895982b4c1fc6adb564cfa1933" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "6f9880fcd353f146b53996d90a7947fa" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "4d0e5df2df24f327c0dffceb896f9cdc" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "23172be491de27f065dbd3e1f282078e" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f52d521b8e866d691388eaf3ff9a678f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "222f3c2eb37551edc7bd9fb2e38153db" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "5ea2d30fb237d6c60f500e7f2ffbf2fe" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d239076b99037e9aaf123264d5942da0" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "ae8cb7778a87321fa8c463238933aff4" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "5b08ec540a7c858aee330877683ea64a" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d6e08e56ca22f2f18a600c03eb3ed1c8" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "7bd707824d4db8ea33251e57e345cb1c" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "2e27724f5d53ab5b5bbce2a8d63289b6" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "3c4dad433112a5d0ba973ed359a4e929" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "dc6b386770b838444f42968492b5b07c" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "18620e716961655b3eec80e475107559" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f2ce1f6fa604f49bead2b24cff7029c9" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "ce76c88eeee3cefca1869fe638e802a3" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "112bb1b4b7085092dbcb34764492afb2" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + } + ], + "md5sum": "3f4bf5622b9ea9970c7fdc45d6e0cad4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ce248ae228d43dcce7b9337f96c6dc92" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "8a60dec7fb94b0ca9c12da6afb00d98a" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "f10d03cd1899cf74614a73bd38b892dc" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33546240, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15138816 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15145984 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15948800 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15955968 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15963136 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 15970304 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 15977472 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 16780288 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18385920 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18393088 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 18400256 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 18407424 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 33087488 + } + ], + "md5sum": "307526fa6a0cd170423fc5e0b31ec104" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33266688, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 7340032 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7569408 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 7576576 + } + ], + "md5sum": "281927647b9017bd9fa752fcba708a1d" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "0e33d6b360aef369d199b7dfb71fac2b" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "5b3eb11fc57eebdc6a2407c0447390d9" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "e09440c385bf81b124c52f817e749ce2" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 27575296, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 802816 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2408448 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2415616 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2422784 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 2429952 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 17110016 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 17568768 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 24908800 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 25145344 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 25948160 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27553792 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27560960 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27568128 + } + ], + "md5sum": "558e33f3101ea1d125d466c49c182ed7" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 33266688, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 7340032 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 7569408 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 7576576 + } + ], + "md5sum": "6100b2db63bee85ccd58f01498d1f66a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "b398540c95c843a12c8a9583f44869df" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f38c79d6faefe4c5a8a405128727cf1a" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "3ab949aec9fcb01b3bb74c344408c3ea" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 27575296, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 802816 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2408448 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2415616 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 2422784 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 2429952 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 17110016 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 17568768 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 24908800 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 25145344 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 25948160 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27553792 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27560960 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 27568128 + } + ], + "md5sum": "a9e87b93666ba9cec43b68110f2ee7b2" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "8dd7c24f724b36c6e64b7492a0712308" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "5b4564610b7879b8f4fd65f7fbbd3386" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "bfba488afce2abb1a9bed64fee346575" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "6429aa588539a2b7ad413e30b6e710ab" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "1d915fe2c351fc37b55681d485b15651" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "0d68dfb609c3dad2bc9bbd50ddc0289c" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "971834151df674343dc48e283e53305f" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "483df670721fb77b9af3d60f52931c87" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "6312634840a234492e54d846b2e928ac" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ed47fa87d985fd697c3e19d4ef74b218" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "9a081e64d6fc4dc68b49c4fc65175c73" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "3a74dfc7aa0ff4dad17e5ae8c6315860" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "6a98a3bdbec57bd0d35faf0cede43fe6" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "287aef2603ee42e50a8d423368b394a9" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "5d3024a58b3db130527e4a6d4dafcc0b" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "41f35587f245bf88d1450702024485d2" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "a34fb90fd72771132d38587d422e3f93" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "e6a7678bd7d9f782dc7d99c2a281a8fe" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "84fb15a8d067ef8f797fbd92103692c2" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "6e3ae4699e6aaa61b5bde0b1641b23ff" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "6290040a085ee489c75a7593b840ab69" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "e1fbd6a0f7ae693bdb17049dc3c83119" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "f063ac2632907471a3a74666b50d1494" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "d84521924e23857c659bcff3ffc0f841" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "8051beed966679d643cde5f3163698d9" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "2a7ba5b9541a1b9882bf8e788dbf9fa9" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "238e15de479ce1da5a6d01a5861dfb5e" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "a8d06b62f5bf25bd6af84c565646ae8a" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 24313856, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 22708224 + } + ], + "md5sum": "3f5b42df2f686edcae01c81c2a983319" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "d75981f9f983db23f433c80e9441df06" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "bd5928a6143314a18cdd0af8f2ff133b" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "78a2a1ea3ebb0a15d19fb54be4b3b54a" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25976832, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 23518208 + }, + { + "name": "model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 23525376 + }, + { + "name": "model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 23532544 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 23539712 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 23546880 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 24349696 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25955328 + }, + { + "name": "model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25962496 + }, + { + "name": "model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25969664 + } + ], + "md5sum": "9533b4d3e24e30eeb9eab11411e7c457" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "b9659841c71866463467e37b004a47e6" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "912b36721a6ede6d3e52eddd7bfda820" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "89296e94ab48388c32d91b63543300cf" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "94558a53493669990a2c22254cd3cb7e" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "08e20bd8ddb19bb9d7a15bb7d79ceeb1" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "3ff946ebea2c14a447ad697730167238" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "95c726681e4b0a132bfa8caa298e893d" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "df819cb146b9beda4009f2a3743a30e6" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "0aebc026d5c65e740aa5131798cd49e2" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "f50a2c3244567b148c0aecf50cf98638" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "632aa94539eefb8a7d2b7fa61b01ffb1" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "dba6b5156dfb98aaf4570b545d28d1e5" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "bfd68cf560e6c68b08f53f10b262d396" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "656089d087a94655cd36cdd5ddfcdef1" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "85476d61319e8be2410b8bcbb74cf89e" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "415615c4203cb7b11b2575fe2b4c6b5f" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "a9ba0f3157f19c9a3992d50a58472203" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "1882f5f2e1a1cb37f82ede0c561f7217" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "ed1ae69e412681d303284218956721e2" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "f76e7ba87a65cf9536c19ee7cc167dc8" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "0ef28051d6f025660e808cfd1bd55610" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 25690112, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 3584, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25690112, + "byteOffset": 0 + } + ], + "md5sum": "fba921ebe23a12c235cedcab48554fe9" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 51380224, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 51380224, + "byteOffset": 0 + } + ], + "md5sum": "44baa194024f6e33d812f78c541a18b6" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 25145344, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 3584, + 112 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 802816, + "byteOffset": 22715392 + }, + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1605632, + "byteOffset": 23518208 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25123840 + }, + { + "name": "model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25131008 + }, + { + "name": "model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 25138176 + } + ], + "md5sum": "276e3f6ce31b75ac3ae4276403e15afe" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 22715392, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 8192, + 448 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 8192, + 28 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 458752, + "byteOffset": 14680064 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 3584, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 15138816 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 3584, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 229376, + "byteOffset": 22478848 + }, + { + "name": "model.norm.weight", + "shape": [ + 3584 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7168, + "byteOffset": 22708224 + } + ], + "md5sum": "ca54e73e86e70c9c1ce3d5174d86c928" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..52bc08600ac30a02901b858f80df0e90cc4ba13e --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a02dd3c9fabfe337c781e8a1eb3f2aa1f67104a7c51383342b772d96fd274f58 +size 458752000 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..74382b32d118c41a48720b85342326b12826b51b --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d17aeca898300a352364fcaf1b58317b3e9ad4d2d7b8941ad1a6bbe333236b +size 25690112 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..e2f079a3c4b049f40861c8bf08f0f1cde86632c3 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b35c08129c54bce1480bedfb04aec9f7501c7f07d5ba2ce2f349d8e73220bf1 +size 51380224 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffde11644b4d0f118d2f4e55390c371685015164 --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f630d6bc927c2fd3988de481c107af3e76de178b4f1051bdff620230485472 +size 25976832 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..674930d625338aea2f3aa1b5943dcb4b4c5c523a --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e964476c926dc3010c7328d565b23d51da5a429adad232899ad5b1797785a8c7 +size 25690112 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b242472429ac0d5a80f586ffd07272890d7f577 --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:710f49148e2852a0af5713a6333e4f507dca6450b93df311ff7a31309d86be5f +size 51380224 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab6994d6e4f7d5092f7e67794ce1bafcbaa8bcf5 --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a302e8509193144cf91c9cac271c2f61c38467ebcd397286627236a45f3769 +size 25145344 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..59e5b503dec6889c175949630dab87a07d8ae804 --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdeb8d5e97487ecf4b8fd9561a6b7e0d6480cb36ccda4dcc5cb286d41b195429 +size 25690112 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..e250704be1d0fbf170d1a4841fc94051ea3ebaea --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f45927885ea35a1dd473f7d6f24faccafa98439e9640bcf12eea43eab01c8963 +size 51380224 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..6be8644f2ed282e58d0c495748866dfbc4adef93 --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628ca72f7b53388aa0c386b32148b9c95dba225a8e6856cb74e5c6af2831f907 +size 25145344 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c652240a48e4b0e37c52bc330fa03c414d47024 --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d5bc5551fa07cfc9c94fbc07c0614d8b99c30d138d4354446170514aaecf98 +size 25690112 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..96e52cce7066a7494db11833fa192793964e799e --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f3c3f4b98c475790c4707d17ce726c0a78818a3f8cec510a2d2ecf174902bc +size 51380224 diff --git a/params_shard_109.bin b/params_shard_109.bin new file mode 100644 index 0000000000000000000000000000000000000000..5bd8ab38d7231b53538c0204528bff276b70f2b5 --- /dev/null +++ b/params_shard_109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d296ea050334c83312fe0845795df67cebb02b5fcf09d2bb6c3cb606347f5a37 +size 25145344 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d034337f77514177e483ce6ddeaa36baf36fdb9 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e5abde0d34c0d8f4ef53c2fa5d560a799073818be9ee226e1c0e3de9fc212c1 +size 25145344 diff --git a/params_shard_110.bin b/params_shard_110.bin new file mode 100644 index 0000000000000000000000000000000000000000..9010fd5386b818227052c85c2659408475481a66 --- /dev/null +++ b/params_shard_110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d39dc8680363227fb6bf95ce8af3903189552c1fda7e45800b6ec708d1e32d7 +size 25690112 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..52e2539648bc2983a835f30fb8c5bb9e09b762b4 --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b537b12cafa6bc4186e2ae7f8b7f015f48f610aee58f9056faf717e8e1584175 +size 51380224 diff --git a/params_shard_112.bin b/params_shard_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..b3fc12119a2d50c699668fc99beef3608eb2788c --- /dev/null +++ b/params_shard_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a7e9052ecf8701e154265b47470d5e3de52e357990385ebc5119b013af98774 +size 25145344 diff --git a/params_shard_113.bin b/params_shard_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..65f141b748113702a7f2d5806c99ee917916aaf1 --- /dev/null +++ b/params_shard_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00575e235f4cdac2fba9b530da3420ea31ced1606b4cc4d4cb462d2f6f86c97b +size 25690112 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..f8fd9f2eceff539aaf6b0b24fb9ae407bc999c6c --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c58b1f8618a62bc57eb151a9064caf516e50c33061b14fee338862932d0e2d3f +size 51380224 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba80352b8dcececb8ac9c906315f9f03af95b5c2 --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb901acf28c240811aa30c4dd7e4175f9b0aec44e55aff9bccdcc1d33df4e85b +size 25145344 diff --git a/params_shard_116.bin b/params_shard_116.bin new file mode 100644 index 0000000000000000000000000000000000000000..822e62c5cb2c34c628cba154ccc9e9dc531ede08 --- /dev/null +++ b/params_shard_116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41f8a03a913f3435722be7358f5aa7979688b7a4e4f2e748fb0df2abaf41d53 +size 25690112 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4bcb8d321931f9d8bb1b071678e4c2f55102f3b --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3201ba495081c47c21e2649bb9531d7d08db1e425af770d2436e2e570bd2c2 +size 51380224 diff --git a/params_shard_118.bin b/params_shard_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..e77408a3ce74133a663be2c615f76cb221578082 --- /dev/null +++ b/params_shard_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6139548ee80918719cba1cda05df72aa7c9a802d210c633ad71ae936870f63 +size 25145344 diff --git a/params_shard_119.bin b/params_shard_119.bin new file mode 100644 index 0000000000000000000000000000000000000000..172b0acbdff80bb9a7a9f14dd9911d366c74d060 --- /dev/null +++ b/params_shard_119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e34ea34d35a8df87f3070ceb4ea8941275efe8935d14f6fec89e18b7d2b465ef +size 25690112 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..5bca9b515ef91f1942676cd25af1428d3d9cc463 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:951926ba117d1399c701fe8d480a383865e56de00408d763fdcc974abcac9e37 +size 25690112 diff --git a/params_shard_120.bin b/params_shard_120.bin new file mode 100644 index 0000000000000000000000000000000000000000..08a522f1360e3a2351b5573aa8a549946cd1152b --- /dev/null +++ b/params_shard_120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8632ae03e8211dd434178e0b36fdb86b8745e769b6568fd762a6199218039595 +size 51380224 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e9bbf82d569c1a17ba7db993cba99e318f2012b --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff96ce051f27d5f404919916ba2046729ac0a3061e30aa5119529a08f0dfe01 +size 25145344 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b070be0385151f4d924c1999829a5c9b43471a0 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c4fae65f66ef60eff452e56e754cca850f336d4aefa43bc622328db1f62715 +size 25690112 diff --git a/params_shard_123.bin b/params_shard_123.bin new file mode 100644 index 0000000000000000000000000000000000000000..e1e12b21172a51a7f0c5012a2fdc9f36220da6cb --- /dev/null +++ b/params_shard_123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ad43cc6831dbb0f0c38f2a53d4a42609c699a7908f03c01beca3febbcc257c +size 51380224 diff --git a/params_shard_124.bin b/params_shard_124.bin new file mode 100644 index 0000000000000000000000000000000000000000..9438cec5231596a087b372735c40777544c85146 --- /dev/null +++ b/params_shard_124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c0aa275ede1c1949e237719e00c7154317cd6bb78c2b38cf5aff58bc125571 +size 25145344 diff --git a/params_shard_125.bin b/params_shard_125.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a1e71645c4955d2c36997a154e421c7d5aa7680 --- /dev/null +++ b/params_shard_125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2bc120481e5f1fed9a3ca33a360982f7d147996934c456c0c3e657145f19bd1 +size 22715392 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c454311b61b76fd9b6b0ec7e350a3a231ad81b4 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed713e10d9ec06abf0580df514f53ffe4f71e9a273cd11abc912c5f4710e3add +size 51380224 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8fa1009f5554beab39962729f7d804ff8b1aa51 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a66e86b386f4a84e62fc92435a4e30598758b518753edd7ea2b8ff579108769 +size 25145344 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..f197f74e4de8f06244e6418158505a02048ac1e9 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90fa610107da563a92d0d54c29697cbb8296cdf9c6e268661a0e28ef6620de0 +size 25690112 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c974ff6b7c0407727a21b2716eb65f7b855c4b3 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53fd0bac0de38404c3674c3a29f5c48b226b145ad845246f8e0aca36f01caf8d +size 51380224 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..a63376d7cf60bbc69e7ec9aba96d4f4198a0c91e --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f39c08240600713759ae9a9db468071aaf8b13b4c1315bde265a6f2650ead81c +size 25145344 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..06b1f5244ff87bd1e815e5b24bf9383f926f4cbe --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84384534021d973c2f5d64a5f38ddc76811dd8c20949c2210d1bfc6ad3e61579 +size 25690112 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..43b9c0811636f5a124bbd323b00c5a1bbbe4ea0a --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2093b6994fc9fd26b5d737d7f581aa0a89a42f740d6ec95b2eb54cc281fe34ca +size 51380224 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..803ec6dd7d71a3b21b3a4b5e7a3302a9b95b6589 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0eb1d7f32e0250c0e33324223dd4afe1774354ad2e468513dae881d3b5af760 +size 51380224 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..52ca54587ea5e9732b57dc23185db58043c0712e --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b3071612c5d42131b29bc1e60e1612f8b13db96b40b270bbafb2b94f60aa3c +size 25145344 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc14caa7d70c54f9b3fc722788e931a4c3a7ca3d --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7ee1045072eda698cdda8b8fba34b9f02667e9a9b698416d09adc4397fd28d3 +size 51380224 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f874f21d7e680cd94cae90da2f77abc9bde3f65 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ddf6974b4c585573fd6d0f5cbc962975d069a9d553417422b24af5a13f8cb15 +size 24313856 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..c930bfe13b6699f50d061785444fb6220e8255d6 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd65b1b2dec5f9cfaf16c03ad7b57e0ea3ee2ec11ceefdc1330eca14ca08880 +size 25690112 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d677832ed0792efa569646e716ce28a6fe9cace --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70eaeaac70dff785550642269612ce666e0cae812251b093a50e9ebf671a7699 +size 51380224 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..8adbc24e86276bd0dac4e2fb9bb00777bffff44d --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346013c078a26f1f162a4be246ad3dd58305eff8eaeae9cf7447c5bc4e2de64d +size 25145344 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2261adeb8a3133cdec53c414489a5985dc341f8 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58366d523d0a5c9ce6e4f884d0f9a6114f2b99aa369159bae4145edec797f7c1 +size 25690112 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..45a9d51c42589f2263d96d59de1d103b1ad0733d --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c53440c39db0622f20977cf25b6b63bc8378b32072324c71bf51a1cbe5b4a9 +size 51380224 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8ea43d88eec752f5ba549f598171171501acc14 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d205e8c3081d6284601fffb7a3e565a5144cb3a326b1464e0c6bebe85a0625 +size 25145344 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..c0539a38b2861be31890e004b4a4cd0a6a7e99c4 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef760daa2e3fe583d7fc2595e16d3cc4fb5c827a1ed6955203f61afcabe1a57 +size 25690112 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..470d4ae28a1511c4fb63b2c628aea5aee04664b6 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9676ce1e1c48e5c41de2ef543b99587acea557ddf359a156e4fbadf848af02bd +size 31911936 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a7073b7ac88380568ae5809697d5ee7e26c0994 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cbf4788900325fcfb8790247b43ccc3bc539cacc8593b45bad853de6a6852e6 +size 51380224 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..7aaf0e8da94b1a3eed365c162c76551c56e2bfeb --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a4a3536d1e6e476f7a10366874648ff2cef4329113c7aea16683cd71954230f +size 25145344 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..5097df173d4f72336b8ac2f13abc62ac31046d72 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26294c5534ec1021dfb95235c5c6ea82675278f035a3125b83db01c8458f9865 +size 25690112 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..1729b830500c35c70967b3634110182541126d7c --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed51163528fcc1e76d1552a48c4e56918c2e8737a73073fd712fc3121d3f3e4 +size 51380224 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..d8b4ec0865d061a5d3331d3aaec7198b47635c20 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30876745547da0ff75c77db16a415bdfbd90d816dac546ebe0310a273ddc1474 +size 25145344 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..7385d4b0aeb453da1e35f1ececc46d055999234e --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411a580356b61f0e120c8f870be7ca67998943bb08f46ed0e727098d207faf9a +size 25690112 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..09bd2e569690f2f854cdcdc07c58a526e41fdc9e --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bef2d3c535066ffbdb2244246476f759b961a0abc9bc2c6f923bbd9d7fc536e +size 51380224 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a504da970b8443198751dee8f3d9b15343c027e --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ed879b4834de479ed902e0c117f71bfa7ec996418662e65230b4959842cbb55 +size 25145344 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd48be386143b519707062597cb3bc1353544c85 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e175375dcb797f25821e7a45d9117dca1c473a870ac1388326bbc3f3d85c5a +size 25690112 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d72459729868538c8e725d545fc53b270a7bb78 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:838897e1e94ac4f0542652bb6fdb0ca8e50eb2471734ae9419097f106d0f7e60 +size 51380224 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c99f5dd832a5fa6bf21e97577f860063df715dd --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82823e8b21775613672dba5008d233e4924d713d295ed8857efa70040b111d63 +size 33266688 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..b5fe6d95506b0727c5f5f7055da7f71fa3f529cf --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e387da0c9574c415dc3c0344081d4d0ff873327ba9acae87c2da4d67d9b55d6e +size 25145344 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..f2f433cf54f2795dab41469160f5931b684a5a0f --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f4fdf2eb05fb9059060c4e5baca4e657eaa864ad66c972682ab57cc42b4703 +size 25690112 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..227f88c44c964e3309bc6a8897cef5f906693740 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c377207f3616f177f138a774e58c43d2d8974b79555c90f2879bf6f766b79c5 +size 51380224 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..5834eaa96f1c325e03ef17cba60e8387111293d2 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a2f516b013b22169ff34bb82ab34356d38269688fe7f7336a909cdd99d79ce +size 25145344 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..81d8bf9ee18e39708439893249018638dabbbe30 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885a96224d652617f8b09b6f536afe59d94b9e49fa9277da43d990ee6a81b3d1 +size 25690112 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..461a5727de7d5ce743a9e5811bad1b2ce9b41230 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba015b6c459fd18296ced75d2cf9ab252f9e2223653c520a5d580a309d86dd5 +size 51380224 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac2edf328b8da0b75fadf5a619af48430ab742d2 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65e0829baed8f87b547a40b36184c96538bf3533ddee5fe78a82ac0cd57f2aec +size 25145344 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..0214fd8816ec127997a7634871a5303021b1c03b --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f5b15d6bef8d7c37886e0e41977a2a7980f0e2f6e5a92a3ec89d540eed9e02f +size 25690112 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..088c78c47888eff69ec8adc16abee19310623845 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b299347ddaa00240ab03a2b720c4afa37f35d26d4cd56255b2ca1a009e6856 +size 51380224 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..5edfff6690aa41537be4a2c818c902db6f3a7692 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5490da0612ae4135fdd1f21c38c6dd4d033ed49519d9174963a1ffb9957033 +size 25145344 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..9258dc30f071e646538d702a03dac852627f8868 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e54d43e75670e32125b5f60cfd0d4389b18dc57cd8167daa7a8aef8412d25314 +size 51380224 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9b781f14bc1b9ed8fd7cdbf4d3962f4f5f0165c --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52d21bc8738e63846a661494d851972b301bc8ecbb906e5b731312f535a4068a +size 25690112 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..309689c6d05612825e172d91ccb21a39e6cc711f --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c7839039811a5923c53078dbc126281b8e6c7872b1eec595ad6f5d076222bcc +size 51380224 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb91c32b87ca91fb05c62531c7bd10efa889aa0d --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3119069102b509ee806431a4e025fb2cf9eaccc3b38de71b3b856dd89489e04 +size 25145344 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f9070b0c460f2341769b57af73c3d3ab0176e0a --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da93e85d1b6b5e0e2138a8f35b772abdd9aff02e811dc46bb14082e135e2f027 +size 22708224 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..d11ab0d263371380d5d0bf55ccd700f195e07464 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:308b18d7c6459a6f63b3141a8a38f9fb1284fdb7746211e667a7fd393fc3b992 +size 25690112 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..690321c2648cbceab161d01bd34529f868f400fd --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ed7091e2d3f9cdb34600f8e07fe0948514cf49e5605b5fd48be1fc9afe23c7d +size 25690112 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..604ae2fc3f2079e4fadf0a5a9e39f737cd7daa09 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9edcff7cfeb39d0f4c3a5d6210afd48c5ffd0e16ba694ac7242e1bacf7af73f +size 51380224 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..73c5f4e758a885f9335a2770e49285f65da43bba --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4543e4d60a7984ab6d678fb311298e6dae80e1a2ce7f3812ca01ca7bd449617a +size 33546240 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..1163834d09eb45f7e6a02149c1119e25adbbe037 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f925343f55081d63688bb1d5a99c26943ac51272be8bf3d2ce23249aee9f801 +size 33266688 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..35c6e0b8d2ef4d75a2ae9ed92ee5fc432ad525b4 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fdddc63b12ebf274538d3de18609c2b937f95a772ac31f59cf971d0ed52ec11 +size 51380224 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..55c9909f284ffaa6fb57f2b00aa67c2a968ef5ae --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c44efa419b1115a7d85ff54af55e851b158e857055c35af35f2e285aca7d9f5 +size 25690112 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd310c21d74f926511094a489048d3a1c14f9b5d --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:888050617718077f9ed0c77dcf84fa273583b5d27cd0cb478e8e22b9dcab89de +size 25690112 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f014344b70f276856cb9aaf0f34158667386fac --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ac72f036d851a06754ce50a924b78fed75cc7e687928c919e528eacd3551206 +size 51380224 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..c97090f1e6640e2f1f8b0377c8041b495a34629a --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb4fddade7ebe888e99314123e93be3bbe058121f5e18628059909d1878b1923 +size 27575296 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..91bae79c719ed509af9c2025d99463214daefa8d --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7602f8d47aedaad0130efd47f017777172bb9bacad79c0a3c4c81c4425abeef8 +size 33266688 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..419b04d25dd22f1197ab9710cac10787ded8b90d --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fceada3207ff134469385c5584923acf0087b0654f3aacd1a1b22c170f0f5a63 +size 51380224 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..afb5188d76c6cc70fae794f5ab7079bcc227da85 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c41f8148fd8ea4ed39774e2e893414a2a8f8a2d942d2ec7bc52fe910938a205 +size 25690112 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..34b8a893abc41df15fe714ec2d600f8b46290b2a --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae0f259fb528665ad8dee7e72d7c02806d58dbbd40976e723449e34bb616e45 +size 51380224 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..c17994dfda705f3a9c379e1812079e20bb293a00 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae3fda448ab5e8767b49d3c31ec14aefc59703a63435853b369edb7459e130d +size 27575296 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1149ed94cf2b81db9ba44eb4792adda6de8a8c2 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c9c6b47fb58642981f0bbb68afb845aaa02dd2c5aeda9535ac52e0896baa9a2 +size 25690112 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..6313b8c335f7a02be142f71bc1982da11d9e6690 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:962968a07151d02b6d2d22ba7cc85e4bd25c1464ef371e8fbe11dbd13acf251d +size 51380224 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e9a0b7ecd525f266d64b9c8f01fb04f502efa41 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a6d196b274de673f7050065aab632b912f2c0355f8892991847690c94b0337 +size 51380224 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d35b293c42998cdb2dfed131fec9e0ca8bafdfd --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e318ce96dd38de4403692319c9e8179680dd30f16ecec3bacdc94ebf69842971 +size 25145344 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..6967a28653309828d643d82160579052df94370c --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae0aa2311ccc0b96215723edaacc9f0bfe1bc937c80ba744a6f28a8ee5ed9b6 +size 25690112 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..33d4a11f01916a33dce027205395c2c7c66efd27 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f312fe05357b33829ff9210c0263f6ab6424985dc081338200c092934c322c28 +size 51380224 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..ec3d0290a669546ff373b3f685e32511520e42f6 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ca924af9fe52157a7db41be0aee272a53958840b25b36a054cec80f2a54d35 +size 25145344 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..f135aef8bcc62e2f92703cf1189da942a24bf682 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad899a45773b6fa437f3a66266e208a9407726ab84391027a476dc3db94cfd9 +size 25690112 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..7474112b84abacb2f6770f2244f4c0d246d0722b --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbfe2b9346ebc941b7c0b514e6634fe24b79a4465779378d658a2914f608a5cf +size 51380224 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..52f0e9f500b1bd75ed00bff4b0a277073a6ae1bf --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c7a3195fe3996f7ca250b2eb4648f9acda402cb46291ba36a750f6a9094cc7 +size 25145344 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8ce836c7c3713c9029d75dd828ff8f0061a8ef9 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d185028bdbc6bbdfe008513f78be76106c3194fe745b859278c55c2933259de6 +size 25690112 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b5acc61eaf55ea9ad48648b799a83b5b591dd8e --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1858df8ca00bc0b425f800d0d855e7e7bb8710fc9f7a3f9fdabbb7f614609ce5 +size 51380224 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f6b660554216ea15bd823f4bb7399463c673c1f --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:086e4d480de7e36a94d238493fc3cf3572cd06e1af3186b72280bb7a5b12127b +size 25145344 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..b53a5bcbaea9ddaa055dfea2e3b4736ce7e54155 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a78d4960a739a53a253826d9bc2ac17914832001340fcab686f27800216826e +size 27575296 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc4cc73d39312bdb5b83788ab5068bfe96b774c9 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e71f134f077a72673fccbff189d744367981f9e652cb296f9b6f034e46e083 +size 25690112 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..03f58770b9d72a79abae5a801c4a09ed1b036b05 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85cbaea424f341ed0d0e82179813ed5045994b8082a7db8a255b4585afb6eb47 +size 51380224 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1e9fdbdd8e59cad0bfaac31909781b9e00a3d88 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcdad44059a0f465927e4ca3e2e2ac511df9a524d82414fc10ba44645632c027 +size 25145344 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..5cc8695e7f984a56ec4d1ec43b582c3eac9dd0a1 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c374e39810221d52d52dcc5c163246118273c463a6190f3e74f91b146d391b4a +size 25690112 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b3a6a8ce34804116f3c6ee415964ffacf5af686 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8598efd911bbc65ace2d620718a0d2e1ef8742c5bc9ad99e8c16ab037b96ffa5 +size 51380224 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d5b0cfa47b993a72bb3db854e230366886b9dee --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2b8b9e60bc30e864a88ddc67c8c37d884d516966bd82023cea125b2fb756ba1 +size 25145344 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..98b5332119794fa60fb8ae5f8c05e30d7c005303 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74e1fbcc578fa0b63d1966b816bbe6af929f2f435b0d4083f7718f355d90adb +size 25690112 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..f31e7a96283c2a7f28d3041bc3809dcf0d49e61d --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0435ca2cacd1da45d5c877a1a3bb520edf092b2eae422aa4e6ab14f77d254d12 +size 51380224 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad6ace30902e3915f9bfb3a94793517552261f29 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a103f16ff26d2680818a6f07032f066efb00d9bc960b49d55271b9eadfda45bd +size 25145344 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..d1160e88105a7bdd484bd4cdcceec5926a7c8fec --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741dcd37c4c664fb54d31e89b9e788492cdc4c7bcd564772609b6b3d087610b5 +size 25690112 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..532014fe997020684673f655b50ea6deed00612e --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0957f39a0004e3eb6cc5bf56eafa47fd8ae3f6a2cd6e9f6f15764c3dd765c637 +size 25690112 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..22a5cc5bad874c3c9e1f1799d53686ad8291c0c2 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffe38c9618fe193edb1a3413980c3246164b8019b7710aa048e6e8a86ec6058e +size 51380224 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..0870901132402c4d0eb8bd7b138b586e4e5e48e8 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cbb934b3a2216e8a0d43ca99b61047e3e2388d61ab551d0b99d16a862cd96ec +size 25145344 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e338447e93063691f606162743f6cee244b46fe --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae6a7cf89b9ae8b9ce37d9f303772e28556fdfc005f5c0e9bb0835ec7b491bc +size 25690112 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..644cac4e4a7c6f5c94fab2696418c2c40acedbaa --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2126828ac7c1037f4f0f2338b4cf7f4982820f28bd8bdcf5c482725eb661f45 +size 51380224 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..10a9b9ee461345517982d0ac6ddfca90d081d1f1 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678a150496019f81a6b08c917e75e72851613c707ac9f5c9f6264dd45d76dc59 +size 25145344 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..f698663985d4dd4a1e702f466d5d6606d5b35856 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2ff0fbb5bcccadb3d45e9bf3ab596d7a0b46adfceaa132ef2c81a3f3d4bb18 +size 51380224 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b876b4e5d05da2e0f3b9d5e736a2fbbd73b5a22 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d8afb7f31886d07eb8714acb3499d94bee45dde8df328a96de9214f76d3266 +size 24313856 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..2500ce1b0b19b8c493915fcb310679e6e0889054 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b932b193c959db9137ef05e97c7cc6ea28557572803e1efc5033b92bc19a14b +size 25690112 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..22d2a5b15738cd37154b75523fd6ebd3a9dc52b3 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e8a0e5aa9a6623e4538b40df2309494009b9947fd5928864aaa80b837948477 +size 25690112 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..e1cf17d6674491c60e211614b028615610b082b0 --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5b6c72365df562a5c3fadb71582d4e03a9af17995781226c58531ba8565d08 +size 51380224 diff --git a/private-llm-config.json b/private-llm-config.json new file mode 100644 index 0000000000000000000000000000000000000000..077ef84e1cf03491fb2a958f92b55b141900862c --- /dev/null +++ b/private-llm-config.json @@ -0,0 +1,49 @@ +{ + "model_type": "gemma2", + "quantization": "w4a16g128asym", + "model_config": { + "hidden_size": 3584, + "hidden_act": "gelu_pytorch_tanh", + "intermediate_size": 14336, + "attention_bias": false, + "num_attention_heads": 16, + "num_key_value_heads": 8, + "head_dim": 256, + "num_hidden_layers": 42, + "rms_norm_eps": 1e-06, + "vocab_size": 256000, + "position_embedding_base": 10000.0, + "context_window_size": 4096, + "prefill_chunk_size": 128, + "tensor_parallel_shards": 1, + "max_batch_size": 80, + "attn_logit_softcapping": 50.0, + "final_logit_softcapping": 30.0, + "query_pre_attn_scalar": 224, + "sliding_window": 4096 + }, + "vocab_size": 256000, + "context_window_size": 4096, + "sliding_window_size": -1, + "prefill_chunk_size": 128, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.7, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 0.95, + "conv_template": "gemma_instruction", + "pad_token_id": 0, + "bos_token_id": 2, + "eos_token_id": 1, + "tokenizer_files": [ + "tokenizer.model", + "tokenizer_config.json", + "tokenizer.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e7caf9f7f6e58c4bd4563dc5b2fd2e596f0ad070 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15eb04bc5ad609fb26533e8525302c5640a945e5f67f65b7c849900acda7d99 +size 17518497 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..de123bcd38f7dceb1f8daffa815b43615771b02b --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,1758 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": false, + "model_max_length": 2048, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}