diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,12543 @@ +{ + "metadata": { + "ParamSize": 805, + "ParamBytes": 43115020288.0, + "BitsPerParam": 4.836979883651508 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 131137536, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32016, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131137536, + "byteOffset": 0 + } + ], + "md5sum": "8c6f3b41b1b28cc6afa1a9d0d26364cd" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1d96b02d236d1b7a3ce6185c5624bf93" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ab0346e6195e8c6030caf4a1f7da930b" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "adab0ad459aa3b521d3e0e447674d3d1" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3f40a48b429063ae0e88125aa1446292" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31105024, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32016, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16392192, + "byteOffset": 0 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16392192 + }, + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 16408576 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31088640 + } + ], + "md5sum": "08e6db609c0ddb6338a32f5f34e71b39" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "24e7ce662f0ad8faafee7a61e3714992" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "617b8ea8e8dde71e6ae3323393401ada" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5f5c7f1eb915b1afcefd8a2cf583cfea" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.79.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a208e81f586828ebe64c6f7010d24e65" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c48ad44ee50f5479fed4ba3019ac4787" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 131137536, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32016, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131137536, + "byteOffset": 0 + } + ], + "md5sum": "5422d877c955c22c6988d1971c66c8bc" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.79.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 18907136 + }, + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "ce87a147e47a95338f565168ba276c5b" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "beb4c0d50e1f56407be41de59e1de63b" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "aa85b854559e14ad67277b822a7fa701" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ecea057b53141db3a1ac3c018440411b" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4d71ebc00fac3e8fba52bbe0bb0a5bbe" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31105024, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32016, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16392192, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16392192 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 16408576 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31088640 + } + ], + "md5sum": "34f94cb5ccbcc5e528424adfdeb646ee" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cf33be06cffd87aaa989e1a640a893bc" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ea089a7579e5bc10ff5e3cb841d6abd1" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "54f8f55f4713312794d3255bb22e8d6f" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2b672e66c4421f0b4598584da425eaf6" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "895f1b6fe1a88c0aa97a61da0ab1b881" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1c3e81a1e6ff85886642f37a6933b737" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "58156a4ed440a54e75765d8b465f8e31" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f24b664f8f9182d52d17497f9d88cdb4" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9364de4d4816eae2e2dd30d961bf7533" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "eb4ca6c8ee0f10a3620d7f3662aba858" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "17a246eb547f223aa5400fde3fd13598" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "27067abd0cdde921666a2205292a0e3a" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6bd6ec07e0369808892f8d70f5872a27" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1bcad4983027ac8722f185aa6f62612d" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0d98028a5d64c34adc7faa7b29397e41" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 28344320, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 13631488 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 13647872 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28327936 + } + ], + "md5sum": "f88775d54a9d51ade1ae95af52a89a9a" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "50136a34bb5a0c99cb40031d77716f40" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "396dd0c7c5577a53437de690151f9949" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "50fcf97d69b1f5973eb063c8344e1c6e" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "80aedabcae603b6297c003d76e666fb5" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4b74ebe54da0884088bb593e1abae65f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "55afb77889d3e900c87322047f8c9885" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "051f6fa72c57d0d2d6a48fa5da6b84ee" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f617a26b8dea577eef7f8792c31fbc0d" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9bd74a2e91f677fe3df14503dd860614" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0fe93139141f81a2427ded30b3a6285b" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "76b91c64244b44eaee6913db7688fad4" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "73f558e35814078b191c861aa5608ec3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "1795e7574256b5dd5cd9e68878cc8839" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3948bd14cf1ae987f09a2e3940733f8f" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "95b0ffd66031fc9e19a7f1125a3765eb" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6b0175e69f266784210a14820ca3e56c" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4a0b5c690ee8b6c3ee296d069e62797b" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ecd6a6879900b81a7d6886cd7adbfc9e" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "3d1d231e6f43f8beba89cf59f1e761ca" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "65d3d386ead57b246ad9cde8603247a4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "10a4bfd3f81dfce398b300f03f2fe43a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7c418268951613edc8a44d49a16f4a35" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6ae4f4220893c38daf51d9d5f041a000" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e4779992ef66795c3af0425042fa4ea6" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "631b008d984e67212b1c45ba6ac45e3e" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "100c3d3e7567a6eeb2c8681272948b5c" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0d8769ff54cf3fccfff4c3b4faf2e651" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "38b49f0851bf5a5b0fdee2306811ac94" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c34b11c4e75dd28a8fb60fbd74bc7f83" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "67b5648a311ea30b5e71af20a9a3630c" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "9b791f9cb774c0d743e7d063fc66ee87" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "04f7faa67fd81b4a23569dfd366e68b4" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1eacc71d61f9ae99d8607a173421fe73" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d75e32fc545cd1438197175ad26a8a34" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "28841495bbdd5dad78aae028979a530e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bb9365a01219283e6c6e4170a3a8df6b" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "8a32c9c1cf33adfbd586e24726bf67af" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "5faa8e8ac2bef2104a73d1a9091c4cee" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ae5e2ea8999ee99e64c9b26f9e1f408b" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3316150b2f925eb2fb2fa7071da4c34e" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "532a3f8e8291ac500bf9f4081bc05b88" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9401ab6403a01ae14aceaa7a2a6a81fc" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f507cd1ab12fa8655470b965cb0b999f" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8a54ae84c5304d93049bb8fd82bdaaf0" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f6208df427df52425fd3295aa964a507" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "36bec5b69dc3e16f2c1055cbc3ea9aab" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1636ba5307e970562ff5e68b6d94ee8b" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "21e75d4e7b6ddfb22f87790e2db195a4" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "cc6f203dfb78d9cc0ad3814e772357a7" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "69af3cec864bac3074165724b40da3c0" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "64a5c2121a3a6db195dd83bdc834f2fd" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1aa324897adb889f330b677d07e1b84c" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8775ffffe8b63380e179efcf3f35a1a0" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8907b6497e5b53c35750cde6397b2892" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "0d8948ad24b5270499a1cd0035ef4f38" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "12916fada5f96e3983c377ade09b8176" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5c49c15f8092278f14154fa637d080c9" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "699c9d99ad60de18bf28a7fdaf3dc05f" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4a72d6ac4b8f96e0722aadac0005cd62" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ee0e6714d287adddf920e982f8e72721" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c4153cf12c1bcb2dedc0bd1531b4b74c" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "f8e6cf951dafe9cb5fc034c0734075b2" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b2811baf32f6cd12b313196c6143638a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c8c043ac00880e7a9cf768f004c2822b" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "00a00c3fae3fa0097da0933fe0a6be7c" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "46f33ba2eff077cf1827522aa8265553" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "ecdfe19e823a388eb3bf2c2f0a7f1fb9" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c471ad3eb48dfecc39d3456de0105b70" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6c1a310eef9b960558e8221a7ac974c2" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5c9e6b4fa7f048e280c434c93565bc59" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "db28afc3b3615bc4f4a4590fbc16669d" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "99fd058f5d35d77c9b469df9a4028d7d" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "22f932b8196c10dc284b9f74f2312b94" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "06cf6df8ac81765ea7d0f7e8d3950302" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6c169245d1ba08c5ad03235a6f68ba5a" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "fe96483940ffa182c05e5dbe8a07a59c" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "23a5a17f070c81ce6703d55d6d5472fe" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "67c03a22cfbed47a876a15b6afb2af2b" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0ff7cb39263dfeee1572bb3eeb70eb2d" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "9b813225e70f20579b88a7d0cd422054" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "dabf4d95f75c9a84bcd7689a72567c3a" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d5e05cb3e4e80078476122204ed50aba" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "30866583d53cd0f444ffe0d6e4ff1709" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dfedfc62c65dc47e6074561e860e548c" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 29409280, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14696448 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 14712832 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29392896 + } + ], + "md5sum": "d8035db7c51f9e663ad05e0abf3da9b9" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2c05b0d55293b80da02e0eec884b2fb8" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ddc0cadefc83357fa923ce591b15344d" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9166d0ff83e887cfdcad3e4dbd20adc4" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9cdd27462f5ec1307dad12b96a678c65" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "970508d3e1d6fb3700b86f74270edc6d" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9f9ddee9ffdeae86f652678780b2fb14" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "0c16638a86b5cce0563fd543fd44f47c" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9a8024585f7dd6a21ecb911091c60c01" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bc7645ee7e54a0cf8f5eba1c99035df9" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "26e3fdffacf943da8868eca1d99d0b04" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0a388417dc3d31cb9892d9ee481a0169" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1107aeb2331da698946419934be38ec4" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d35909007ffe156f50bc3ac938c06da7" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "fac61d538dd0ac47c8ae825d213589a2" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 32555008, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 13631488 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 13647872 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28327936 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 28344320 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 32538624 + } + ], + "md5sum": "d02fc681f92d87f2f449a9459d426b2d" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0aa515b72fde38d728c108a3f3fee0e9" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7eaabf6ff2c93e7d83bcf23eac4f4284" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0ab92d8e5fd98fafd8037444c7117e48" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fb13d1037e30d24d3c42c6040e425033" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f4c2d863f1332cee7590b41e26d2f623" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "799784e145a94698a65e779d7ad4763c" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3d481a5d29e1f31540386c7be2f47822" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5c37a74ddc3bc1c4a6389f9e7eeeebad" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "340f068ed0ae6fc79d72f942ff6be43a" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b03663313c276b64cb4b44833eb2ea43" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "649214b2b1efa10bc5d33b556077f4a4" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "548598f805e710c44f50061980593cbe" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d71021180eeaf534141e000e6e24d227" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8d3bbd8ad970a8a9532e5f1ee39e6201" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "394aa33a0e5c00e94fda5c52744d82cd" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f5d6bf1e741170162f06eaa7c1f481f9" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d34b7039c931cd9530278dd9173d4885" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "8eade678ff8cf9e862805a00830cffca" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b6e1bb4626670f00e25aca959a43d515" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3078e0acdb174d9b72d07a754b7cedd3" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "12299e05d6d0438b5a8b4ee3c20f490a" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "057cada5276771eb26e02ff2239d3071" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "638d06db288cbc8f44795c493202521d" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3d3c7a316539b4942e96698d73ef2452" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "63e23b86edaaa13fbbd4ca89c8242947" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b594ee0c7ebb35ffb5cdc189825f2411" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "532fa1fa50c542b24b4dec0d9e205548" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c82bf631fa2caed28e82f89e44608978" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "eea816d59f64c17606c806bd63544e8d" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "1f77d6c53b464f9be1784f233604a482" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ff3e6276eae04f9fded6281b821cecb6" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "368fd807d2d65e26b03ebfb6f2d4bf01" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dac9fe265ffdd05f830cce8667520a39" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b3f77f6129508ddf6c31a76b5488f516" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c2d56f7283c25727fdf2e234e600128a" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "38398e58ce07f3cb02333d0c517f7c0a" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "dbdb778160314b3169b70c1a00a5d09e" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5212b62428d1e4dc4fb719bcb74bb7aa" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3d2e86bee4079da9a424805ca8d05d76" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c3d959188d15c798b97b091eef790a90" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3aad6a65191c22eaa909a4cbb703098e" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0261e13f15360f47b7b9f6808f7bf5b5" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4bbf3e3be8dc1ac2b80bcc7fc6f78b32" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f1e915c9b7680d931d5c275cd52d4234" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "134b0b0b85d80b7b5ffc87a3758bdd84" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6418c0b51deb1c74bf0fe7aa53420e58" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "63f466f1538331c03ea4e63260de3557" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "ca8b9602ec3f17e13c0ef2b16f917b48" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e779cd91ac501ff74c87c6f5ea9722b7" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "45f3877d644f7579dbb2e945161a619d" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "937256233e6ae1e8ec84fb4c4e2717fd" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "71035d90ed85fcca28c23fc2d17fdcc3" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0fb5d754f301c5b374fbf395c9d2d460" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "dd585ed76efc18097c688a7f17611e1c" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "2fd65c6632dee248406d26136cfc139e" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8f7f23f41d6898f0ec13cb5cf5608a98" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6d6613471bfc75da2e40661878991008" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "77b685099cbdbeaf1bd74f26bc53a9a2" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0f539d46f3435e2a186daee73859ffd1" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ed28a71e601cee32ed3881b75d9d4b14" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "473754d8ee3e3128ad76f554c572b3e8" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "21bed5bac2d6ce4aadd9d9bd354f9554" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a3183c9cc07d1660e78f58558b35e209" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2f5b5aa52c5592dd94285b11aed5b651" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c14b0ef977e25279997ebbd3bbb732c2" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "45a623ecfb738e688d3f725b723378f5" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "74ba294c84719a0b11df4da8365ab690" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "86bdff984f6eaf5463f6f9199feef5ff" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "383db8b2f2271ae03bfbd6c9042fa5b9" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5560fee07d6e5c393f045fe41aa0e039" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "971b899f3ff58ddeb675a1471c56f473" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "b1f676c58b51b654507ff880ae8fcff7" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "763c0254c0f7dd23b49782f2f0bc3970" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "08f01c22b6b3d087f7fd336122d3b95c" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5cca9f5ed0a0d0055195c4f7056aafd7" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "00bf89b409e87d60abcdf8db7fdb946c" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b7c1a8a4936886a881a701ad3aee830a" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "cc560211d5d1bb056db109131f3ce471" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "3d944b93f773b6ca38f8db4cac1ace8f" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e606bcd5df5de61bda7370bac00decb0" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e1e59b476e47f364c5998477ed0982bf" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ad036e935584a51bb47bc4409863c95c" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f411c7424c19513f2f238feb298e6714" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 18907136, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 14696448 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + } + ], + "md5sum": "fa9964bc9ce928a4c5212154239f347d" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d87d3e1966fb1b291d50df038c19f0fc" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ceda6f3a74267783f0d733c3ea6772b4" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0415cb888cd6d21ef4a7ef4fa17f832b" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0a30359e4a7f49d0290e3a600b2b632b" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "202c0315665e5a297836f4ad462e03e6" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "7b5b91729b87e139ce7735c4048d18dd" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "2697a0d374accab4063a26848c3a3208" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9b5ed9f52bf64bd87595575eb79e00a3" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "90bf9f7d7ac90e50e76b9f6ba2e7120f" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bfc538e3e56c6e23b205272188edcea0" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "81aa7badf9bd78bbb191ae480c68006a" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "6a95f715c8f761b748ec8adc0b5f7c71" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "75317c2870d7c00fbb7d40b165d43eb0" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "57cc30dfefd0b0ef79f88d73fc32658b" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "daa6367e81384931df9c022279f48a78" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d0ff965fef281a251a6e0e13c6827d6b" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ae4f3904a17c306d3dd5a1314ba0aeea" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "eeeb689348b9163c227cc9697f309bcb" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "01a3b7770cd6a9bc2be9e953d8df2e05" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4cce05aee650e57478c80ce99db1fbc2" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "45582aed643bd124050afd6064062890" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6e33b8c088d192d06f887a6069cd08e6" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b1e14c4e1995077a1c685a2608acbfd3" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ea879696a00a32a0df00f4152f0b1f7e" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "29ea42ae5a1c626c381a0d7013f1d657" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e2f6d78879c7289cdfda7c8ceb30ffb8" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "0a49140babc9b5538864d6823b2c562c" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7c0cf0a3e5d1451a9bb2ea2ddb5a09a1" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bead09637f25b2e9e39334cd3e53241b" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "47bdb638b2e0715fbe22d962a5695e0d" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "780717f23c609215a02ad234bfd2d847" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f231420526b83c3228432af8ba2ca767" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "922c8c4cf4c5a1b35b49bd0ab0ac3302" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2d2e0d878b9f5f15d4337d4bc31c0e9c" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "05c85cebd5ed29b06a20213ac5f7d10d" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "6912cec461d7a4c6c06ad2596110954d" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "bb401d72b9a04d0014df816c972175a9" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "23b1f1167ba64df4a1f2502ddb834474" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5710a1cf2ca326eeafb8f992325cb728" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ad86a73fced658dab2bf9809b93fefe2" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "62e13a2fb0c8879e0d9e85976d5c3786" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "be4cc211d239d11d2e3dd1a4b9cb1807" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5e0c2be26f7f8b35e28d86bb4ebd9956" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d04b0c31f98ebfe2a1c98b58050173d4" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "2116e9985ac7c0c137e3c9dcb6e29742" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b00943805e8284af73f118e8807d3930" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "61317733a5b1c3c677c27e6c40f41d73" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "d6409e32a814b16c30badc3c9d9570c2" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "1f2782e53290ab57015a9874ea71bc2a" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6ac884cc2c3d73bc8c50ede1c3a730dd" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c1d89e9dcc303b4c10fb67a50389ed55" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a6c4d68b31882c0f58dfbabe641e48af" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3d9aac125af7080f82cf55c7b2dad4dd" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "f39ff6d905736835cd97e2232f15e170" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e8e0bd7c8aad66f8e872b3552c6844f0" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d32d507c8cadddd18ee5b9d2400c1ce7" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4c7370e6c7afb0bd795454d3e40c2aff" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "801ce813892ed08f147a8e403a65f1c6" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f2ca0322e8e48ff926b2b5b79ec83038" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f0eb6336d4036e1331fb7355ffc927a2" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "c1c36bfc09b9335b33afb4efe47814e8" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "da0c1de5a8c6d9c922595664bdbfb08f" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6029f55c3d1ac07364df69d9ac206410" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4227dbea1fbfc16f9e578ee4564a96c3" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "188cbd95edf18cf891495cfa368c6ec6" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "f91394273496c329c3ce65ee42c873fa" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "da664c0ee8c62c2f625e13b6fd2f0c3c" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "42d69419fe30542093fb51a9d92f248f" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "51a6eeb08fe227d5ea4d0d1f213b23ae" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e204a40a2c50e3ffc5e3870f58fc8a0c" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b6926dbe163fa6a9faa047f91bd27e1c" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.48.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "e65ac21643191291acea207dc4a89755" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d92bab0cb3005af047e74ad9dfb3aece" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d5255e786b67ea19cf6c02ddd489b6c8" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d6714112ebf7c6e500094f8f23433a60" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9bb92f5eb4f34e72c2344bbc9d4230f7" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "fdc7e8716a74ac9cf8fefb830a061d41" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "4abbdacbb5034aedf7c6962b7d97f759" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.49.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.50.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "5af3806822d3841649f6929f27047e4c" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6cde6191d0f8f5e8687ee9061bce14de" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "27b3d9c2544794a4c477a1a76fa1e23e" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "02ab466c8e953856c51817e4a35d615c" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8a8084f3b920a71819e1ac705cc4de7b" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2658d6aa7215c236f600f31114a3dece" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dbb49a5b701900e23841a2c1c978ff0e" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 29409280, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14696448 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 14712832 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29392896 + } + ], + "md5sum": "e8629e0ef321819505781c698cde8cde" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "424420ae14c52492b388ab3e480fe4b7" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b327c5d1fe7111562711ffb1c7ef45da" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "25a8e872ca0ec61ee506efef3c0b98ed" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f29bdb58f532cd2eca02adb070bf866a" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "29625c9d26218479e9de6cc250a9e781" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "71dd6680f74a36cfb02b7e9fea0741c7" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "936c87c4c5c8dd498f346111aaf3c0f8" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b35691cf5399341d19a21506ef692515" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d8859d1f3bf6b07367441d72f3f680da" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "98044ee4e8fcc7997ff1bb618085ed4c" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f9dc19d3d4d509aafea023ea362d0fb2" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "826bfc7d970cd373b3dfc4405baaf8ec" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d553fc62ae1630e13b84ff47b35f0e61" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "f341669aa46370fc0903ffb4e8995380" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "eb184af4d4f7ce9a0f61837d63486773" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "39e78cbb0c044ece168f0d4ade5f01a9" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0452f47619e70991b157a5edc4be72da" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1598a48489b317424201474e2794aa5c" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f99cbbb0c64ad49163a038ba0b130ff3" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.51.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "58b2d59f58b9a95f0bec32ca1426d234" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c40d6a6da0284546784e501eb2fa7c35" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "956677d8fd04148485e40ea5da76bdfa" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9e233b85a0785e431de028757df009ec" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6cdf110ae8727f4f9f5e5487320b8720" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ad5b932cbc17f0032368ed57deea5f91" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.52.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "931caec272a36219ee9481a63a1e1dce" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c1ad9b8d73752926ce2df30de5184ef2" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c7ab2c58da6d9a533e04230353361474" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8bc8941162b033a1b93dc95fcc18fc67" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1b0acfbf90d2244d62d03fd9a21bbc0b" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "4a9569ec94155633f76c2c948e8f4a0e" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.53.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "7c7270d384f258c9d1769153f3ca9aef" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e4e51474a05eddc772853dfe66b4ebf8" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6b400e6300fb0636a2b01cf50daffd52" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dbf521930e419c9b2b717a0130c52129" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8c02c43bb1f06fdbc6b7ea0bb2054869" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "12adfc26d902541e5fd0416ad8eae341" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4a5a3bda7ab786b5e39726d8d6b61c95" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c73eb5ac3c59c3a342dff06a2f38dee2" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "eb8e565b23e0f3a952bd22e6ba426c79" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.54.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.55.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "88aece775c89d67c7fd5fe0b879cc3cc" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "af5f9e6c4ef43d7d33ddbee656c7aef9" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "50c7a1194cbe46a04178b36a55074481" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "30343ab7c27173fa2f2d9021eec2afc5" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "19c9fa84645d2f2f4a0a23f45497d459" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "38d19145f56bb49fc50d51b77b014679" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "fa80c018954b0e2526c6a5582a488b29" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "286e7a8d12275991e9b61a9ff993486f" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bcc6eb5450ff6172080e3c90aec27a44" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.56.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "69477fe975bf0e7250e282797671a04b" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "df0d2a95b4304cbc987b10e0405b1599" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2bd11cf0641e7d843572fbc617476e9f" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.57.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a847fd410de5f45a14a43872c3844d52" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b186116903350b409e59eddd7c0cb505" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b2d52161c263cf542c2099537330d42c" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0c6ccf4d642cde151bb7258bb815d12c" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5751f4a1598b92bf6c2b7ffe670dafa9" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5ac19ffb48cdc5329a6d53c3707cfaaa" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.57.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.58.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "1dd99d00de6dca39d7d1f4ae1e29b9ad" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "26c4c25a588c8f1239ee747bc06f6f09" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e0df4fa7e8782b76c10d30a88f283dc4" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "e6d52f943e82e556d379e9ffb50355cb" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "cb30b2f42f32e75b90a14a2fc11e8706" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ed671d035f4ea03509d0f74514c6036e" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.59.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "204090bfb680e25182804203844ddb71" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "15b2c9736a71c58eda039f89b60c0b0f" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "96651f5b91b4b8bb7f6e55e39dcf7370" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.59.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "7188bfea54295c0948285de9313a277e" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "30fdf83dfb2a3d76ff57e6fcbd92ed94" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "423164f0d017890e0d7aa6e2c348c696" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.60.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c2f098b55b4212d67e9a58c711afbecd" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a5e9d6099bb4262b29ebb78f636c7467" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.61.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7e5e4f5248602371f8109f07f9aca143" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "43969974ab3667628dbefd68e3e37c9c" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.60.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.61.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "b4eb05148376700901728b0b1b11b30b" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "9e529826d4e9af76f4704a6b9536f87c" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9c093f88814140f54a12d2279b8a33dd" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f533545d4fc0a372f1dbc12ca8fa7410" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "83f6a1f665222fbd3c4ed1461a9e5152" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "5b90286fb2cb2c54bba549e4622925a0" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fa05a2929a5479a2127493120b1dc7bf" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "285b5085d2ed6836165cc905db4bb945" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d5af742b823c97ad7ae94991d1666a1e" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "00b15e358142b7a6680d4fe037c7d106" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5e69da77863ece3bca7a4af37140ee29" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.62.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "6ba9e7fa5af7484a4c92220e242dfc17" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3259f37cdd8c7ea09e2cd5c5f3c9fbf8" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c3f26e41031e1156eafbc253cd7691c1" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.63.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0daf4e5faf1b40885697146ffdf5d58d" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6847ee09c2bc393ac1f88702ab31da02" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.64.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dfbdb2c854f5bd89b16d59ba0ccc7e74" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ff850c88e77621e49b95461c0fb1e6dc" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.63.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.64.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "9741964ca67d44fe8e451d7f4e49c69f" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d84c8fde42f72ecf8d9e08695d041a60" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "68e629cb88c2d38c1ee0bf15822ff536" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d4d9085b7d9e056703fa3c9edaaad156" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "dd0062ba602c98b6250b8277e6e6641b" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 18907136, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 14696448 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + } + ], + "md5sum": "f0f3225d1b9acc13b84f34073eae36b2" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "677245a8eb0614b08250cc78162837b1" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "827f672f4347bf4cd422d4ebde9d5265" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.65.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c30f9de08d19774b1a8feaacc9c73e0a" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "436848b75f959b2f4e7d9b874e917d09" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "645e57cade4bf10516b0e3d2396bea0a" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.65.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "687f418b126b163309c27a4bd8179803" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a0fca0abde152bf6c4934473ff58be78" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6ed91d628035e988c2c6153b8ef920aa" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.66.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3d503990c162721dbdf30ee768b64a00" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b561279bcd1033236a879cd2be0a20bb" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "517861829d81071a3dddccf28d499b17" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.66.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "0d5b278e517399f29d573eac41a4a60b" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fef82f91556124c39121fe422557fa71" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ab3fd26a407cd0fc617a0cf3bb66656a" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.67.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9ff1340349e50daee9ba9896bd009d50" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2987a777752272919bf83ebd486a6aba" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e7bf72109178f151633a538cfb4e26f1" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.67.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "e3d74f787d8c8f81c4ad5b0f3c418557" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b4a80d107ee6e3ce80e0aa695a55578a" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e68439d320f2efeb9eb564ac01e751d9" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.68.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b54f084bd31f7784b53ef1b157ea5e54" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "416db5131bfb72634845d6f6de9b0025" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "68fa754fd79545d631bce140a96137c1" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e113d21119deb6cd46221837b7a815e6" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.69.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4b877c033f5cec032303d866b86ea2ab" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "27a1baef13a294be0c2f8a3a1edfdbb2" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.68.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.69.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "e24fc64e2d8150f881588bf6a3add20d" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "05abfbe37ee397bc767cd75d2fef7227" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "511218149a09fed144ef2a8c18890554" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "9f17dfe2fa1312b6d12767f8fed6beaf" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "36ab394dcbb1c3955233e7eacf5cc338" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e42d400fdd9b7cf5806e177c45462c0a" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.70.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2402fd9f7725e917ab9a59b66d30493c" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f8601560c77b7cb13b267e81e306e25d" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a031a421193087365e357f5683cd1150" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.70.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "f1a0b988084dee2d29caae976c7e91c6" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "038c1b0dd4b880047f10bb94fbcabaae" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f748a668abd9a9b5939002430681febe" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "788636aaf3eb970e75ca4ffa4fa159a2" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2703335aa830cc30ba1aa60ecd67355e" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f55eb7c694ba6994a4d4a2049a4ab816" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ea06137d978c9f1c0d75ab9e68ae7e78" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.72.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "20abe0c0ac77388147252415d0330c10" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "78b3e1e62f63cfa8d6625969906713bf" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.71.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.72.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "420471c48bb417de871d958b9e9aad5a" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "588c81967d060eb68ee4f5660356c03b" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f41fa13492bf67a3399055c881866d65" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "f06dcdb5d3870434502974927aacef44" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0b59b0391db6ad4a162ae70367c44dbd" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "dae251807675e0352e14791c37c03300" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.73.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c42c68370a895ed4759cc854d112fa13" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7c0dfadde72b48531a9a727bd3ec33da" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7ebb4c637284a8595e89dc32cf11a204" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.73.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "d93c7003b83513defdc4a5e69d49c929" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4100a54be3d7df164025ab9556c4351e" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9c0a18dfdc0d3f8ea457a12c49dc495e" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.74.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1d041b74c1a96f6ddc57f1753b1c4940" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b4a309335066a16aa0886f2fed50811a" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.75.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a17207054984aef047cbc35a6053cd25" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d0e579f0538f6f20b8b9b474d5a975ff" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.74.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.75.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "082c34e89a8fbe60806cd0f9aba57427" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a2d88e9362e10c4d060651ba25d200e3" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "2dc948984414c732986f904fc8f38227" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b8c6932db946314cca0865f1894f4f0b" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "73539c2c0d866076629da256970ff050" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "1110738c469827deb3860e5c02a5e186" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "617be627c4ae3ffe8c1b9e047d477b62" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c4a21d071444512503e9e87cc12e7ac5" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.76.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9142c29ec102f50c3d573d354506cdda" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4152ade432f3c74364607f3ac13b56c9" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "152e79bbd180340b575254ebe69ed8c0" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.76.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "142166f5cea64090675913382dc92269" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4318aea05318060850be07d4ba273b69" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "262985675967b8ed68eab62b0bb6d8b5" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.77.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "66e72851616a23bea2f2a965f2720bb3" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f8051bd7f7c19121b7e0f2374f477a6b" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.78.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f58f4c9ab86ca0a6466341da25a5c807" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.77.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.78.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "c455669f490ba82974e28f846e9a71a1" + } + ] +} \ No newline at end of file