diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,13367 @@ +{ + "metadata": { + "ParamSize": 885, + "ParamBytes": 40900313088.0, + "BitsPerParam": 4.157646319274502 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 622854144, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 622854144, + "byteOffset": 0 + } + ], + "md5sum": "21ac9b8edbd5058ae8c215ded0fb901f" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 77856768, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 77856768, + "byteOffset": 0 + } + ], + "md5sum": "fda03d61174c0c87ba4800462cc88669" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "69c432825c5b3ead508f6174a5934033" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d75f298f88baeaf7e3b7d8837f108763" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "57e1c7267c56b11a5fd6c10d87527ce0" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 622854144, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 622854144, + "byteOffset": 0 + } + ], + "md5sum": "6224dda625457cd798a5c76a6d379275" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 77856768, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 77856768, + "byteOffset": 0 + } + ], + "md5sum": "0494adecb960904576d95f695fe797cc" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a31ba55c4c6e2049ab94827ba17fa73a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c0652b1efd88c82319f8b8fc5a9450a5" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5579db9e5b61b1df42d8f356679d8df4" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "66ef391145aa88d9b62296e2ea6650dc" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b1e93d9a728144c963453b1b154135af" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 24645632, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15171584 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15187968 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15208448 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20451328 + } + ], + "md5sum": "aa06b077f582b474e63419f000dc0226" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8592a5b0cd0edcba4d8046f0cd56f51c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "39e13be1169d294ddb0ddc7cbb31d2b1" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fcea482e0d18f8d88339cbdd4479de8e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "73898e57c4878d170306b47d0961018a" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30330880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 15155200 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30294016 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 30310400 + } + ], + "md5sum": "c2a5cfbbc27f36dbec228677b232f23f" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "357dc8bfb333e66b9b56a140fc71cef2" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1bf5bc65613252279963e379670dd911" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "04f8100cfb8639edbe03272b758ce594" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "85ce89d5c3d3f1040d66b07a3cbe439f" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "aa8daf770b2eeaa423b33abbb0f37470" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0b0b8d4d10cae41b539375373ebd84a7" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29872128, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 9453568 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24592384 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24608768 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24629248 + } + ], + "md5sum": "6a94df0f7847655461e5e3f0ff4f57e1" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2bdd84c47b771ee6fa84ee5fa1dd50d6" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b79759cf861379b0d4e530b18a1ce769" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d4d0539e5fb246839ffedcd27b210ede" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f7125c5529792ec21445b313fb272fbd" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "47acc506695b395fb2a95f4376c108c4" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2c77e814a7678c4cf65ef96464a6e7b7" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 28856320, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4210688 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4227072 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 19382272 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 19402752 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24645632 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28839936 + } + ], + "md5sum": "12b145a6397dea1a9267ab6432d5d9f7" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "42788ea52f93cb18475e6a0e367ce53a" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2b20dd4e10c3345a567a217f29d65aff" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1807a83cec53874304bc4efc93faf13a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "98fab03a6f351466835cdd603bd02154" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7b95b7d5bacce89fcb5271fbf35aa46c" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "17701a14fea50cc8f7ec3045fb0f2be4" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "344f0067f6b2a2fd27f7bb5bcec5ea90" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "1c23112f19c1f94bea9d5e92eb60b153" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2fa121903425f57b43deea19b913146b" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b3f6d9dcc54a63d606292ba2d70b1389" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 30314496, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 15138816 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30277632 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 30294016 + } + ], + "md5sum": "47cc873c84c149c66e941aaaa3d2b911" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e63c15c9731cd171e1595574a96be8ba" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "482dfe218ef0d23edcfa4761ca116093" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c8693f0acf6a90d68bbe152f05d0c635" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "30ae731d367bf19b5c03b02a4c8494ee" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "824680e546981983407e34d44ee7d4b1" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6287b9025a90adb3af65c84ead91d02d" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29872128, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 9453568 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24592384 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24608768 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24629248 + } + ], + "md5sum": "6fdda35ba6d6f94f630207601cf71f31" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "110d471d19d5b60e8d76156488661a90" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "51dd67179dd7ca4f2286c055effa7df2" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9d7b5905199e846cc1651e9943ba9372" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c7d0ed26f6ca6e6e87bb38b61896cd7d" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5e7dcc60fb4845dacadaf0d186074ce9" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ba2886ab4e64aa9132b19747aba72ca9" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 28839936, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4210688 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4227072 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4247552 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9490432 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 13684736 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28823552 + } + ], + "md5sum": "08e9c3ec93876db4b706713e6a741dfe" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2f1ac1bd5b21a866dc641c9ef22a5fa1" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8b718ee23c1d0620957fcb92c23f78d9" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "75e6c90a47d61da9e968c9ab39b683bd" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "114af8cba88cf0785694bda8a8860baa" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f8b1743842ef476fd139534d7f3f2174" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5cb7f7b2efa7844529c2881556f3a81d" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b3a741c2d079e71cb790508efccecb45" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a0f425eefe184576b84f18a13ad96e8f" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "104e4ceec3ec32aab6e2d334b62a7e10" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "82f24f26bfde5f8e7408dff5eb53cedc" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a885ad5f36800089329146513944a081" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "db8493f5942a607078e554b4bce06cc8" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8cb5480c94f0ad8a01e7fb3918431ca9" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "cf71db491b5844e0b6c24467c86c75d3" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "52b8c49834ff5f823d1c1c8d881ca10a" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "543348dbb33756a7935077630711660f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "15571b34ae08c2c69317e7c2a2918d9b" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b6a811fd7651fcb19076c341ce2d1cd6" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "98642eb605d8f79d575c4cd1dae3ceef" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c083a70a1c70fb4a2c1c91eb5eb6e4c5" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "56885d48ae9684ab6ee3c954d53e0ca0" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "babddf1946f7aeaf684772ca42816892" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "24acff1d7170aed22d2a7e2cb386cf4f" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "ab4832d942ef7ea11300f29cc607bcac" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "45429548d8ae7149ef0a07e75ef68927" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "005bdf8dc325d69ec7700ea7e811697d" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "260eb296f97db3ad5e3c34af3a282051" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f68c8143f5d54c51151515676852dd2d" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e1f9244233c1e060a60d31945467ba91" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "a67ea6997131d1f49da031df45038065" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "085268a9b2f8118788fb0f206cfdee7d" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1287fb6b98ff7277a0660f839bff4dd8" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "408b1353db0ee0a7789f674284450db8" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7ef7caafb2e6cbfddcabda74e0df5b8c" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c07e7dc557645566673bb70991b28f44" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "dfe839351638cf66167aa68d180305d8" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "cd5e29adcc016c99d390d5e475e10c4f" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f5fe43494f1f17506e12d78359070bdf" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f34c94b0d607c6e1f2a80ecbe0d75016" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4c78a0011de78aabe5cfecfe567542d3" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e2353ab8c2d8e1fd6bcc46d0dcd679d4" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "c30046da7fb4297f6d13def9df3f6bf5" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2aba30079f1a4263b994fd6f96d5a064" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b5f73c998f5187e7c82e6aca0e83f1f9" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8f98dad8d2906233fb5f478d3ef2b344" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dbb0d5d904f5c1095193c5f78fe9d7fe" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "36d0745407021b4b8a61aff981762d8a" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fff63cf738cb32097619c810eaba2055" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "86a87549f17b80132c9280602bf45b30" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4c3f12b018bd40cd8a3267984a2ff36a" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e97a1d001aba0c55024b119c8c1e92d9" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1ad8c651ee120e8dac05c7957ae815dd" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6101f640e8eed91a1102ea55a2501887" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "ee240d2ba1b70411e1042e0c63b596bb" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "aeb874c3080ff7898ee450155e76791f" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9f958989d0ed34ff22aebf722ee1a8da" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "94e6c5e6c6746befe4233e73e404cbd6" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "720c55bf76c0336ff74dcd9e2708a16f" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "370d407b5c9bc069bf4b72fc0ca6d799" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ed8eafaf8e5a330d3423efeb0dc530a9" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6d995a2754b38321a2dfddea3cff5e04" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d589f322d619c17d2220cef043585ce3" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "36329e9afd61e7d0e479f85de1bc2fe8" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c8aabae3d8e70a98289d8a9a232b4987" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "14d0d6e75e8cc4cf691a62b7e8ee7e7a" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "1d5938c99da642be2216e6edd0fc9261" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c9f56b0d75481ccb51604fabf22075c6" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e5e3f9b9f7de30a7f2651b6cfb42d97b" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "24a7c1cbdc638498fc3b16a2d86834cd" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "089d85c15aa811561bfd88f246176aba" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "01d3dd5b3d0199575042b066d874c622" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "046a3c8e2c4a96a68a0aac4b96ff885a" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "30a3373c4e8ce9dfdc6bcc3241cc1aee" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3819cd216eb9e570690b6de981fe8277" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "388497405a4de3614dbf81fe6527d83d" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6117c625b1fb80560ce5745fc9bbd1a5" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0cd6971fe47caa05063036fc25f2e6f9" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "828858abe2eb8d5a8574ce72ed279ab3" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8dc58fb35416ab1690ca1aa8a31fe3ef" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "935688991d48016a1ecaa8e22b214787" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "edc1e093a61303afd48bf828d209ffdf" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fad64cc45498ae68b6d6ddd6f973f42c" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d876798e8c4e41dd7e2ca5b37559e0d2" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "713b10997a37d71defe3ba2f2c0a2ab0" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a688c161ef573296006c9a35e3e98705" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6ecf882327953e14b4d0b5c7f1ed93ac" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7bdff6a2d0d28eb511c27ae2c743e4e7" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3d5238ac969af6fc78c37ee577b58bac" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c48b2003b278999db2672f830596c145" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "b3b628320d864749519f6f8ee732f84a" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b48d58655f1afac6cb71a2051d6183e1" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1ca341a09d35bfceb6ae83aa22fe0d43" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cf1b03ba51524611dd978a1e31027024" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d591e77bff2d7897462b225f9b0faca8" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8e9f5b8ee23956bb2d3a5b6e7ccc947b" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "e86f03a5feec8b41db7bd5df64167ac3" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c3881a6b7025f68941cb4211224413ce" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fc7f7603219e41f9f0446865f20f0ad2" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4626ad4304f598fa82f5c785150efdfa" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "adf6cfc63a20d45ed205b6a2ffab7592" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "23b9d541c2161e00b972aee7c47687c9" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4c668e917780fc23294a982ace43a996" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "fa98618e1aa0d739b382aa7c3c940c9d" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9a61efaea798dc09c47536e95a432a31" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bcf6ff5ee614c9ccd207d0531bdb38e1" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "3ad3f09ee13b02f2b397bf44f9f283a6" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "413218f243f4f2091cec0c8ca356d4a9" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fc3418c73332e19d105123c4b91b0a2f" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d4119e52cbf4de838f977662c447b53e" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 28823552, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 19349504 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 19369984 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24612864 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28807168 + } + ], + "md5sum": "de0f7b6c71eb51a35eb8eee2a2b71636" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c93d4570922735d12e516c14faeb7968" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "4cf0e10e7aa002d1b91842d6aea865d6" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6b07422027bce55a7332d0f50c6a2e7e" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aa35cdfd2e9b4465210cefde5740b04c" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5ecacb05aaa6366cd0f08674f53be383" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a4cadaa3bb374f6b7a883337f29cce5e" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "b14233cf5600008723e787e64976b3ed" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d205ebef02c13bda1d5dd01b30d444cf" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "256ad32ab3926d293833892f0874027c" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d67bca13e066b0bd708012c6cbfd53f9" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "231983b676c66778d85410db8b180e2c" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "7502919612114763aea816632bc7168f" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "3799b3cf628acd1b943a972e3086d308" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7b5f894c5ad018634cd3f0dd59ac1c7d" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8fed68d093d7e393ed7fef971b574967" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "721dfbcd9910e91dec97e4e116a5947d" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b655ca9bff6e68890693952471bdda36" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8a00f1e2e2355664a4ad470e819c2c73" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "113c5b54a7f22cfd67e6549f3b295311" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ee4e7b9e51fe7f33830f9dcf100f6f77" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "33e431e82231b272ab0aeee56c2823e8" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "748269cda5a6871c8b89eb1409991184" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2feb60e292ef7d532a5ba09f59665a38" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "7d7a71a79d822cc58bff8ece486ae07b" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "532b34ebc2f046b8d9fbccc2876bd549" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "38420968b5404dd1466166d8ba05f9c7" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ac0a91e49c9910997c6521948f8b64f8" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2d8e9be9c67e01e0102f2a4e4519bdbd" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "0819698af02de12686fa33dfb36c5f68" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2a342159b84f2e7fb4ea320735362c2e" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "359b4bbe2ed12bebb15b47f0f5d97781" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9a642e0a143f51d995a4915973fd8cfb" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "e8e66a4dca9caff508a80642f8495d2f" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a06ae034c52dd2a19649aa830ada6c5b" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a2381ea990af86b9a8cf971533e9b8a0" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "8edb1b9e177e532f306655022f5a7cfa" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bdd8820e2c10e16b2cf59e49412b2722" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e33720492c5e60ef6e5bebe5f9fc0756" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f7845d660aab03440fa53648b82639b6" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d51fb42b4de14a0a7a095c5241cadb52" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "22f85c6ef7f47b8686441c1cce97f5c6" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "bd5f1168e72398616f98132adcea6946" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "49c09ac3ef760df7125bd06bd92f62dd" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "81d76aca3e9019b5e4698a35fac77c6f" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ee1f59a8d3328f5df5840da36ca5fdf9" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "55eb8592bd334a377bf913532fb8c66b" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "56561d1f8c7d6db3a3372f8264a9b1b2" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "b313881efe6ad53035b7deffad4d5626" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6741a4daefbe39f3e0e00cf1693910a1" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "30a0a99dec8244e06fc69dbf9a58f944" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "52793cfffd970a646531601c7e4225d3" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "793391d4eb1e7c8f0a10de84d87cc6e3" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "369f8b743e5acd4fb8664fa40c576143" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "02d96344dee39f487fb20ecb5026e4c1" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2d1301be48af14e033fbccea01423960" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6b45ff680f463b8a3e9cff984eaea2c6" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "48b38f767c0324af86dbc829f471a566" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d987e8525edadd83c8edd2e6e5afaa01" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1229c927b5f7cb2a4b11b7ae02602112" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9d25f1b262d5eea4aac2d92e25178466" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "c91b8fce1e738cedaf2310b72503059e" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6ebb659355211621d2cec545a767c6f4" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ff84094952cd00afe9c70aa60b34ca75" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1663bc1f065861b95cf6e549e5997a71" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7f8d3e3b516e8f216b3b0dfe90964d0f" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "05fa124be45061811e50029cc1470469" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "53751ef2d481737064486aa15f144dde" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0b7529c522cf34d4032b04e81679bb03" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "74336d19222180c0f9a7e5341e3d3767" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a2a6d50f003b4b621d994d934b48a416" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8527800b3c8f4675bcaa2d05c7b19942" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7c924ddb0ea3336b1ea5eb4c7331849a" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2ebb2b6a1651854856a3befd4ba5fea2" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1b925e36f3d66bd27ff5c2569db69dd2" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "8e5aa8f384270724d33ef24a26b4adaa" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c44c4b9892e8f718631429b4843c5864" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "575473765f77b7e93d3c6f51c1c84b9c" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "67ddf30f2a9e649f08ab732ddb987083" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bfd48706d260e718ff0bf6e045fe8d7f" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "396f0e434cbe3fb5c6e6269b390da234" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "04deb03b060afc63001d5cac3ba02799" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "35d3ac4f819e8650e327b9c12ef214f3" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2c1eafd5208c84694940145d0a074230" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "dac901be55bcad0962d084aa27ee8d5b" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1564179d019a7e50f186e9655a8e30b7" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b5aef43c60eac5cb872408c18d2949ec" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "37b77ad7d5af5aacbb19b8d53c976a2d" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ec29876fbaae650bc3b032af7bf4b96d" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c6537451ad22b0e6e7981e7c5ed1d7ee" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "10d9f4898f1a371662b2df586c08a342" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f5ad43ba7e20a0c4d4ead4ffad059a68" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e2cdf36aca54a2aed063a08fbedf5f42" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c3e1d42ad6e95fde49772ba4c7441142" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "43482d12689a74eba0f58f3edba09782" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0715a04ea8ecd0d1a3460d63e76ec6e3" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "1f39867991660b4ab03d903e38cf96e1" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "108ccc8bb77db27cca32bf582c074bca" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fcbab7640e5ad43a83d4c4530e8b3fdd" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0bb57de6202fa557414aaf5fc2ca6a88" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "149101860cfcee3ad879acfe87eb135e" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9cf4690802fea9025d886792f735e741" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "c4ab1d3fafd20f4bf28a927421e3c0e7" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2cb84a808e2006a0017095dc9c7fc8da" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "403f4045f06f98433bd1169aba17db3f" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "46b62a5920a3c17585b8b07e550cc56b" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "54ddfdfc100f7817f6f32559cfd21e9c" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "25c4e32cbb1d5d8fc8fd7e42ed725919" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "cc86f9e4680edc0f6daf9701ec4b3db4" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "48c15c61a9f4d81121dd8ee6609034fe" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1aef526b42f859b8b08ed824d9bc43be" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ad416a7ffe6057494d070a58cfdd5218" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bd3fbf716c651fbbc8bc36ecf8a281ba" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0bbfbe64b769fb55db9f83cd7be9f3bf" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4cbae29a01bf44aaf4281d31526d8c50" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "ba2e59a51673e17a7d1030717cfe37b1" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b342039e9823f6d671e5065c661a2cd0" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2a71a35b33a6a86f77a1adf83d104215" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "152d7c33b105ed5f31e8d46c05ce811b" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c76cffde23f1bc66161f3ec722817c45" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "106db50244313b94d3c3d117f70e070b" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "374d4fc85b726c1af6ad52ce337f05b6" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "982f7f02d4cc3f45672b33d3ac511b78" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "446b01d3625ff07747f4d07cddb4a25c" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "10118bb2082e188a76e88946ef7ad0ab" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "4f712c562b5823f48d0cca085108aa0b" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "4f2323124641901698e311541e85c65f" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "101fbc6df60d8ca156158abf6edd5321" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7667f06d7e44c75181592e72c2028439" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "1d5c7d5536a24e3360495a1fa44c32d9" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e7a8ed2d03219cff5e0bb4a7c9ffcf17" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "fbddf37028b3d2d9393e66f1bcca8be6" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "435a7516e53baaefeaac0a7e0ef1a815" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b4210f02935983662bd3ab7d425dd787" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "17b6b8445bf7f9b65613595e80d0900d" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "60229b50eef6150e812ea5b12ce554de" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "635a21747ca3138e34121dfa03928bc3" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9669ee358302a9e15436716fa20e38fc" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dbf7cb110a494332d66d3ed7c3cbb530" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a4f32693db276653376e43f8eb9b27d3" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e8fd2117294128fed44b80ccace02ada" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "255afa1baa6746ade16c1c2a872eddfe" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6a4e144f4c8838ee8fe0c32df999ac86" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "009cd8c7c864069e847a9853e772b822" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "84b32bedc217675a27fd6961891a4de6" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0fcde2ae0782b4d46b91c20af64c8611" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "825dab8692ef21815f77c40c7349c904" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "66cf58c51f2cf13144fa9eb65dc26be6" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "dac8bc6594fbb555481b65e19cf5f662" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "958f7decf9abbaa42affaddc0654e49a" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d31a22e74f96ce6b44ec6e9ae7f16310" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2deaa0d5d0715c68ee3eac3a3d9c2559" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c158710dd02843fd129f0bc33ff484e2" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "72b771d593c59fc4eb27624d60806e4b" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "418382811e9934e2f4a7d6128930ec74" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "48d63999e9ce54a574595d3554fd8aa9" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7d55b98bbc80937a4207395536b49f12" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3f637d9b581cc19a8a0075099d196f56" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "9d2740e9effc7d3c77d95fb2cd5afb12" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "15f01fdc89648f34d6f4f1be0976ccf0" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0c998b0fbe0de6f6a2a393a416d1d5ea" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9146c1e23b393e546853db9c8d89d131" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "44133073b11ca7ffc166e5c400015410" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "13f009c1a3039e7480841c456b41ed99" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "55b388bd3f73210cb22244934c5c621d" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "4ee411d4970238e9128224bcd276e721" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8d81ee20e8bc191dd03b9b456f4da1b9" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4bd86d6c0ae92b007978475f660a6ac0" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d8025e0edced69ef47f6d5bcf897dd24" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "10111b98734e054f9003482854561629" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "322bbeee707b0332622c3f141bc8e1a5" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a5bb445b2c35e8862f40eb2b3f1eade7" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "82c3d17a7d792bba6b3eef886eb3119d" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "07786ca40eb80c0a6160b3977c87e903" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "89b1f971adf86968c64f71be480afc0c" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a66edde6ff69aa8ba89d9f966d729270" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "68afc345acf44a439f5d4cf7311f0cad" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "be4b1d9b3fb489df9919c385a9c7b6f4" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "52ceffcb0327787b437759e5dbc14903" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "354868b7ddf69b4b53d4a03062b0b9e1" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c2867d7f82dfe37a3290fc4a28dc3210" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "09f04ff1ad28c9f797eacc385043ef71" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e1aa3c15657d618ac2943da541370117" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "e9b387dbfd784630cf72415192b1fca4" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "de54e75995a3b9599d336d677092affd" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9fc5ca44dc831dfd7766ecb2f924eea7" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "778d52ea01c2e04df86f6a04909fcd16" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2f929c3a60d3fa22f00297d87f110b98" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "2d5bcacc16e684767b982be85ef72d11" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "017cfda2907a1e787910d76ac26f0165" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "cd5a47678fef9a07fdd09367f28b96db" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "80405c2b0df1beab004ab40fc8fbf0b3" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "126cfc94967340e47885ddac5afd370b" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "5a0ff12c562ee784021cb07d69a27cfe" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "423e8f24bb3ee25e37b5df947dc31075" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "168b9dc86d931744dd4f61b7900b5ccb" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a2d77813da99da70a1d6c6147e0e3042" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "5b09544da908af7db8361438d5d06eed" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "07c987d781799ae11a1c849015e38afb" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "491a38df08d5ccff6ed03331e7f21fa8" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "d9d629a3e19794b01a30cef07eacf1e3" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "32b5a97d9cbbf3ae0a522fd2536484b6" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e5119214293f525d0ab849dd705cc867" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "155303af71d8500cc9541f857c5ac160" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cde8b7b43dee53b464c3d56ece43b09e" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8ec805d4b1029974cbf377a12dacb696" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ce69c8250c67ae9e8d04aec63b22b9bb" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a0c2f4249515986545a0c56a5e8e36b5" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f5f795fcb463c92466482ab7e3d3b8c2" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "19d523d64d0dcbad740ff1c5b57d623e" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4559343d88f9375e32fc40804521f511" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9817e24bced154ee2543c7c548551cef" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "fbe0e3debc2bdca079ed2e9e7d8427d9" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "79228ddb63f7f0e342a8098f6cb75e84" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9603fd4a48852388e66d1126a485e254" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5fd7fad26b942874711d8378de330954" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8c6353f1175bae9adda8227c7e44a20b" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1c7113fd4c8c813eeb5da9db5d4bfdfd" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.64.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.64.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "460a15aaa208602ed3f9aaa2be29e329" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "7dccebd651392a488201566a1a698e26" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9487a6cc1141dd256692faf6ffdea00e" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "13915480d34a591d3db56551d0fea2f5" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4b0898fb915b1db98cc5f4bf3b9bf5e4" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4ee2ec19fc6c04c39fcaacf1636b9609" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.65.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.65.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "0b8abbbcd4d81bec9dfb4f8418dd30fc" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ee48656b7d97e035de7c467cf0d3f14f" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f90e4dd1cda4f1e09fb95ccf41f932f0" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "48fe02fcbdf77f91321acac223345fd0" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4fd68e3af0f3ee733b816de2f5c3b405" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "df53b6bda5872e496fdb97a443c67d49" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.66.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.66.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "8992c33164682c77529846885f12602f" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bb74d7e2607545b7ad3b358010f2208c" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5701f116500b72991583b0405f333518" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "aa825b4808e949478c39114e7e4e4ce7" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cf08db439aee1fde740ed35b065fde8c" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e8fcceb6ed60262e4a70e13a1b614559" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "edafaf4a7b2d19914ebead871536b0ba" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.67.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.67.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.68.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.68.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "da29e5eef9fd3aba3e4ef49cf31f11cf" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "72b65dcfe5dd6c2477c1df220eac59f6" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9cf6991fdf9278c0411357896761f347" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2d7bf7ec4488a8e2f9c50ee34e08d4c3" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "bbbb11796eba09fbbc098de7a9d93231" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "501089feb6a1bb484d7f10bb22a275db" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b5e4273e2148b287b6c860542d18bd75" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "313a6036b277d7b3e1d0e5d3d5278b2c" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8d03e26b27060fed86ecbe2affe083ff" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d9917b4ea4f8d0bc6826034233640999" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "76198647bf5d68bf4a3e4453b1c1ad35" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "aa691a477310e413747459a2c9534af0" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e81d95deab09d3bedd47a8fe18d99f41" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "985f23785a8c7de1ca20758870f1e67a" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.69.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.69.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.70.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.70.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "1b6d52627ef21db2aff7b03591e2e38c" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d327c1f4b8719e5920ccf097fa94b80d" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "5992866f7523c91347b3a6e5a6cd161d" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "378a5343afcac1fc0be56990edcf0b74" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "3b2641361563add19e7682c39e4a24f4" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "065d4a9967198a5d42d3154f79c8af05" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "465514347cddbcdd81095e9a8c87812c" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b92b7e4af283e488312528d0a221a5a2" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d383fbd3392c144bbedaa569bad0e655" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1fe48bd720d26fe661710b94e30086ec" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "81daaf9471453691e7987a3c20de34c0" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c5f34895b17e3da5cdf0cf2bd0a9ad2a" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "e4d5c5efb675bc8ace0af6de2e2e0575" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f8c0e4600884f642107bba59501fc5d2" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7232452b612ea9209b67cd2987862aac" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "1cbb93bc4e641a0e4434da71d532923f" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "62582375fcf519a6f5102131fb5968f1" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5f9d382bd143d255bc5e2a132dd73724" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c6e7928b32afd042982c73beb7079eef" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9fdb4778b24f53882b5c40036b1377fc" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "acbd5d95e3ce67ca1533e134f4f1f330" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fe6d25915b139410574874f1e5ec4d4d" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "119f86a9adf8e2f645b22da22c8f8ca8" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ea78ee3cf516016c2b3e1e83cc1755eb" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.71.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.71.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.72.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.72.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "35c2dbf4b62a9bec1ac8e3309c442324" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b30c2c1d9074e4c5ab6769e702c88579" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4063649087f2175e9abc520aa36e46ed" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "b1714ea411e8c3bc8106a3f2e3ccfcca" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b8166add00d28d0527d03a62d7eac8cf" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ecfff5c434542103871c243d929b7b0c" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e40ea9a1fc8dc37acca62438d91e677c" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d961de78096d5eb37b380585cc2dcfa5" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "df98169e0640786bf1345d6b7dd94197" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.73.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.73.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "8a0e02d599edd58336e01737d0325deb" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f1169915ab8fce6fef86854bc02a3d5c" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "833f3faf249610fdf04d65a2066ef81d" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "64aee489469f1ca8fa1f926d34c56322" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4660bc455c130a51162effdb592afca2" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "446e9a7bb74f00b7a1f86dcef8ef5e48" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.74.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.74.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "0a3610681aa2da4612e6a9d3d7f807ca" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b470bbdfd96577381bfd5e297df23a09" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b0395e85cf2c9cd20aeb11a8582f9041" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4fefcb0af0b138b36aae0edf7c327393" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a8ae69f927a5b77fa88a5cf6eba93889" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "19d47196a0e629200cdabd09f4f6bbd1" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.75.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.75.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "b4d5653cdc6647048589ab194707effe" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "98c961a045cc0d051168ebe12f3a2fc6" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f4feb44df88e8d1669a45c4bf26d7c63" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b102dd6a7e6a7f647794be7aeaa22176" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b18c879d4d96e1ab01f64ca7acc3b826" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3c07e882c85fe666a5330717f7bd12fb" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "384a750d5e6729ab952804ad073f7d84" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.76.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.76.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.77.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.77.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "231f367df3445e87c8ef9510d4b563cb" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0888ff8f0d690da224cbdb6455fd185b" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9a5f165ea12696c3a2435584259c9ec2" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c71fea08e44d5c0f63dac2e9a60381ba" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a02cddb6fac8e3ab29cf5830e252f671" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 19349504, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4194304 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19333120 + } + ], + "md5sum": "0d4fa5737cad50b29e948aa99f3b22a5" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "f67d1acec63cfa8ccf7503595334b4fb" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "528f48bf51e413f1c579adab9f58e090" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f0aeab1e7b494ebdc237ba206929f4fa" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e3100f12009b3056eeef235016da3651" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6898917e74104dbe684c67e96f436de1" + }, + { + "dataPath": "params_shard_480.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "df1c97c037446201070542cd7bf0dd75" + }, + { + "dataPath": "params_shard_481.bin", + "format": "raw-shard", + "nbytes": 29908992, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.78.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.78.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24629248 + }, + { + "name": "model.layers.79.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24645632 + }, + { + "name": "model.layers.79.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24666112 + } + ], + "md5sum": "2701e563d4f3421549622d6ae174fd73" + }, + { + "dataPath": "params_shard_482.bin", + "format": "raw-shard", + "nbytes": 4194304, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + } + ], + "md5sum": "a4aec8de89bbced04d9df154fc9d1ba5" + } + ] +} \ No newline at end of file