CodeLlama-34b-hf-q4f32_1-MLC / ndarray-cache.json
CharlieFRuan's picture
Initial commit
fd78872 verified
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 21092663296.0,
"BitsPerParam": 5.000635812792825
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 131072000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131072000,
"byteOffset": 0
}
],
"md5sum": "f83ee6c5a5bca19120ee1cb0c42a66f2"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3fe51b7b4d5b80ed4f8f21e75119a5ac"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "9e19b1cc6922322231b7f3141d4548be"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 27705344,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384000,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16384000
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 16400384
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27672576
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27688960
}
],
"md5sum": "42cfbad5cab1e4374acd4ad00272f96c"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8b1fc149bcbde4fcd9ccd1f67adeefe3"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5802cfe91a70f2102381f7705303f287"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "732d0c9ffeeac63c45876a64c12c43d0"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "316ada3623875ae784dd5f1da5719f53"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "7a8b24c2bab286a41d64f13ce4d9d110"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "25673e75796c4963e09bf44ed2194e4f"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "39fd6de353ba8baa543e03e227a01e3b"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8fa773aeef8d324a2e4bf58866c39a4e"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.42.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "4c8c56d70c0c56395cfb1d498bb0f54d"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "62bf77bfb761755fdee0206ffdef0e1b"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4fca63935fc6e4fd9ebee10752a1ae52"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d37ab2388b67d64c3b12f8f36e45292c"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b66738a375df0d4a1e01947682d28174"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a38af063396a2d4fd140c4603c2e26c1"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3a10019614ba6245d2e3449e63838475"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "bdf7d642408b430bb7810e3276d4795c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.44.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "bea67d0348d3a038d69de693b0a6d7fd"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a9861a06d6726ae82ad7264075affab5"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "33b4dd5eb4a9ec84b89284a3ce4a998d"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4a165d2733bdda5bd8fd5f3a7f06fc3e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8c372975165dd7872f4bbefb78536f75"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0a64f902198f888242b017a8c0bb7efc"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "6931f00be3c6abc70a323bcb8fa9b9f4"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "beb6e858367fd031855f83e701c2d25d"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "3d9ac322624e4bdc407a64c09b5e0346"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.45.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "bbba912af842b75c3345fa033afa4c35"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "287e3283ead59cb14e936d7c660b2f64"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8963fd30aeaeb06745fd5fc54988460d"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "ef71f494d1242115a3d262f888178542"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "69fb17667dec9e7c8a4f3b073beab32c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "859fd4ddc96184561ee4ecc1af51b070"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "60987ea2a898c49c4f15bb5791febd7a"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 131072000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131072000,
"byteOffset": 0
}
],
"md5sum": "60a7e5040bea8db3f473e0585b546b65"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.47.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "9871082fff630f357ffadbdc971f782e"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0855d18551d5783218b6cc47dd8fba12"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "6449d295424e50dbbfd10c69bc10d8ff"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6a1ad3e36f669333bd5c2899cec64a54"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "573729e5baef0153c4878aa111a7b207"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ab30c26970e15a7d447d11dc839f2902"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32931840,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384000,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16384000
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 16400384
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27672576
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 27688960
}
],
"md5sum": "9a9cad2079b8414bf9864b41c2366ec3"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "fc18bdbd2893f4ada47523b902782e6a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "746beeba2d25ddc1b1236a6cfdf4c138"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2d940b1c7d28aeb4f83c57dc89749598"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "db186dd87c082f4bd2333c4ad6b55278"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5f18075efcbc4c91a85a9e5ff3145b45"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b1ed935a5656a63e4d535d6833756744"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 24952832,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 4210688
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 15482880
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 15499264
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 20742144
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 24936448
}
],
"md5sum": "ee681c69f3e55ed9033b4f541e92bdf5"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "4c49934074069c62364ca1628686d570"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "941770f39527a7a7aecbde52a23606b3"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a0ef492ad1e3e7ed9f66135ece5a58a4"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "45e8c76f038aef2bdd22f27d4e97c43a"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "68315cb822e9f00587a5a8563c8dd37e"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d31543a978b804b37d0b77db15b4f1af"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "1d7342b30a0179f7aed7cc76c32ee8d6"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "430ce7ee38d090514aa40ff2e49bc7f5"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "b210353a82a55614fc818e9a633da90b"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "62912e1242665322412605a7bfe7e669"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "dd81f234b52f198ca9b7c754520242b8"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "4b704d0a572300deffc186766745cf91"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8afd540ae32c312c402914cd33248791"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a1321e30806a952dc3881d3e5598a4e4"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6853f5a0eb521f2571c43b8fd0f6c944"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a2fdccf448237ed1cdfb1a8920c14583"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "af2817bc3d52f9d40ece2c421f45c953"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "bfda614dae05eaa052475f5ab0e14109"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "23452a3e5e6fb0712c5e8c3edd6fda57"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "dbe382ce99266a39c6b8b2af79d587d9"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ff459cea45630c94aa5c1506877db29c"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "ed8c2231d12ef847644b0195be018154"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "34e01c33baaeae29e20aad7c33c6464d"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8f908e7a0f40dac3d255e17809251029"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2b9296f4f2fe78a36e8dee04768c6f36"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "366d54f71e8d049152bc1d99ecf96410"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30162944
}
],
"md5sum": "e23f3eb63d3439f1e6b8b29a8028339a"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "eccd87b19fe85556274f0fe0c2609b24"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "deae3faf7ec625e197948c456dfad31b"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ad270da7641ad7f496cfc516990e3b85"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "739c5afdcaf68ab96628d456185809bd"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d144790da7d3b02621f2e5c85cc89be8"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "923339ba742238272fe85c90a8f17ce0"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "9d0d8cdf1285e48b48785d61658dce47"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "71f217a032c93567d01d7f8ba9760bf2"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "9e6d3fe7429c3a037a69ec32b5ad220d"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "198633f5fe847caea1a2c4c87193ad2d"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "eaa9666e16e3d56d3216afff352bc193"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "0949a4bcd3b1350287fc941c4d51f668"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "cb60d5d83d94529815631790d5abf2d5"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e080d49b67f2027a369476b9fa9cd32d"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "de5b102f99c627b1a31c3b7b14acb335"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d5521abd6af8eb0e00587e2a5f4babce"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a9412f4784f07abe82b0bf70a1540fed"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d2f88166114d311a7354c831a53c2037"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
}
],
"md5sum": "dfeaf38d3a5d9d82ed8c2f3a42aef880"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "90fc39ccfa65021d4863b2e109aaeec9"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8ec891bfc629997864d81e825f81157e"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "2be5f7e7debdaf2122e23d9d385ad17e"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "dacf0c014db7b8bb1d89fa962317743c"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6294e58ad5038bb7f20ab9802d4461a0"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4b3c7951bccb33f14b4a92270eb4140b"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "ad93480e6860241dce64419922393f20"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6066ff03dbe7b1c104ef4a5de8202335"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "c94c0f9050a75e06abf5c675604392a0"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "abf3e3a13fee14714bfd70973d323913"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f3a5896362f19c9e0df9d8c20481cf2a"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c0fc09ac9ff5174eb104ffefe18ee916"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e03fe1fc55d6d26fbc2f8129b0f30ba9"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e5886fd16238e3c3d94b9accc9315198"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "41bd2875987d94ec71ed64edf851f2af"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "9da1b823f28532c06c34a3a094f7bfe0"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "34d305b423cfc2042c84d993466d753f"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "792a3595fdc8a7da196545a76389807b"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "916724d4392b5e09ececb350ccf4ac57"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "35a64a43d94951601fbde2f5b55ff8b9"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "989dcd8f1970d67d0615bdf0003abd4c"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "3adbca535c0771531ad63d032c369edb"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "6a7a77c20a9438bed5ba16d93ba55f9f"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c3eca17389d3bf8adbbb7d6c7b2ce7a5"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "88841c018f9dee2f57d08a0e4d3e0a95"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6f5330ffb926d8656f37d65be4059b14"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "6a06e6d5fb69030ce8367b95d4328f3d"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "0ebe708ee2b2c143bf6996fcfa72b045"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d5b18c9dec27597f38aa895b375b1e7e"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b06abb823a3e032d5373a31d217e63d3"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "e736d6af94d7413c9ab6d0fab6f090ea"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "56a735ef80089f147104d8afb361f154"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "941cf9f91c610ba600f10ff83accbc98"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "50d3224855c9dbcd7c2df430665906bb"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "420957d755c5f2ea6dd0793633d8f697"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b1f4b7f237cb04c73900abceb401ea90"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "42ced73bc336098ecfbbcc4ff3d26df7"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1d7cfa7c69d5a93f9c8923072caf1a6c"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "5a355f7a72bebfaac1615f5d4bdd30a3"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "544806b9d3052518eb3c40b9c703470f"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "ff415f5b830c4899eff3cbf4c6f70d5e"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c3f7c8de425da4aa98ca61a6c440cc89"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6064f62e935a7118af504dc64143f7f1"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "24dab5a3ae204dc22a07a69da6dd40ac"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a890125843cdefa9d80d32eb24337b35"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "33cfb084508a76ac34164fa126e27ca8"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8b7479c49bc2eff687f30d912b7585c9"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "c81f674ee8a1653f8146fb8ff62dbf27"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d256aad0ad4f5de1584e2878e14a455f"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3bfffa8be4b2c1a987fd2d1a9652c1de"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "fc7a28d3c46e0e194266147ab4823a10"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "39ad4c107d11ea8ed7747335ffb251df"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "80ef9f3db109b7d5c45740f6f8635fb5"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "795cea5d13befea0132dddae3d6d382b"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8f8cde34a41eb8514a783bf6f6686b2a"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a7871410c203884f982e309e076c06aa"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "626932862e634381bef74453cd7e44e4"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
}
],
"md5sum": "c6fc14442e790be98308ae675ccc26be"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d106e0c367059e5402e0f14338406b25"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "46a50a4fafb77d55beed10aca8f053a4"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5a55489a4530521c770bd061b618b04d"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "fa03d197989b0b8c7116e02987456a7d"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c4f74e6878c2e6b4544f258055471eec"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "44f6d95971523d02b22f89050ac08d1e"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "a0c2224cf571e28cc614d7918b66b4ca"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5702ed0d24ea4ecd4953233787a567bd"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "7f63e7afe36ddd30482679b8774df70f"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "3a3d8123c9cf8fb50e6f8168b7208af3"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "9972a0d0dbe2aaf3cb20602aca27e2f8"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "502fa3f2946c4107b5a2b39e42fe3985"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8b5c3d0c05efaf91ad7f97177427305d"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "dd220de02e05140025c60565c22eba1c"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "d18f6b186e591a3506fce18aedad6a78"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "831004e927fc4346e0dbc830aa8b8daa"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f0b6c23d7f76887237b4ad78b73013b1"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4f58775340d398d314bf57a2243e0c55"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "74192dc556b9de08a9365152b253d33d"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e693442fe86b86c3dee255d86ce48fbe"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8e8416e0d5b9118bbf338ef81f18aee7"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7604d1537268d1b40089fa399b04ce76"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9517e4fec618f8f6006fca6c0940c4a0"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "df92a4ab2ef66b911b4a4d940bfdcb66"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "871b78af71daeed73eb4e5e0e8adbaad"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "2cf508e872a64a7385a4378b7d5c7972"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "ad7f134777a16910b1f421627be51668"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c4145a607787b47c457a0d3f826cf657"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e164a8b814751b7b1bf65801411d8c75"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2c3d3c791b277b9d01eb23eb95efcc7b"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "c0282e1e695ac29b3a434291098f5651"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "6144e60ab2114e9473c980df2e3f3c91"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "20e04775fa97fe18d0f2a7b439176251"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5d4ad291b02fa6557e063c3695e06e59"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "73ae1ff6af685ae88f70867b7d19942f"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f293b5ccaad94ebe53a3fcb717114eae"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "35350118f2f244206817064d23028e2d"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5830764ff9bc026b1b4aaec3ef4878ba"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b80d36a5ed357e2e20c2ab51e2349762"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fe8d116b4ad441255ee1a388339ebe1f"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "eb50a32d476c76b37ca9e4e5bb14571c"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30162944
}
],
"md5sum": "2d7e4eec62301f934625abb08198763d"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "fd0d5ed5e9f8d4959e895156855a6062"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "6cab3d06bd6a022a23c2a17cd3956a68"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "baed9824f50d16bac73dd3657eba8142"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "598d2b31dbe55efeb02667d3b929882d"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5b70fed9a37affafe7e854bb3c18475f"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8d5e1e77e3dca02d50144c366bee521d"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11288576
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 11304960
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22577152
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 22593536
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 27836416
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "522563b3d5859768ad963ed43c3c655d"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8f71e642a23dd8711d91856907dbe7ad"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "18b0e45d1b38b0fb758af26f28322980"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "692296d4f692b7d169fe064e0489b00e"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9b7989017ecf2afce720810ee7cba87a"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "73bac27f25f4c86e011565b814a3625b"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "527c9466237f55bc0453d8f7fc7069ee"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "bcc2b5c46842dbfa819a89b7d0ef607d"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "455777b6451eaa38e60b83c2f55afa65"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "66eac92b02d76353c4858ee129c57041"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4087c12256b06984a47c1017bb7b416c"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ae40161b95bb323b33439a7da29d20c0"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "3aea8609be88a55506774aeb1ed34e1e"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "febbcebe04890a6c98efb88d994795fc"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a3a64adbd8accd85548b613fe94e792b"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1f4a4f0a92dd88896b462b5d4a1e1999"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "177852bc7c200d68b18296f58cdedc6a"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "466bcfb9888c960262c46b7c93d883ec"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "0f6903c0e12bac594df876bebd7fdfb0"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "929de909696950144911b9921d80fec4"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "efe60020ef7221505d56a423050f69a1"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "134912b7dc14d8a9f6905e9924cf0022"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1db63a528c0be86eead1a1a35b9a6130"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "45c9988351dcee54e9925e614658935b"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "7491731bc389d9e8681a79e9405dd5f8"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "89571fbea8e53370c6bfed75e04cb83b"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "bc05e86b64a567350875a11159bb6255"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2dda5162c7cc5d50edec9adce3a608e6"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f5ec1d37f5da07f998328ed32ddd4110"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a29d2b7216acc24dca0fd12b94cc6d17"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 31981568,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9437184
}
],
"md5sum": "c98364e4e6ea95b7dbdd1602727c4799"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b65b79a646e9eca50ab30d8a84a02550"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "358be6089c88bea410b11cc8c1d0e70b"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "accf38712a67b3c2b1e46663d294e073"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f292598077d266a7b7130f46fdcd6134"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3b1350b2b26387685a3b014235ac6cee"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "f6dad98c0996d40844c3bc80d4b81772"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "d497454cb75c78835dbb9008ab396dea"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "023e6d25e94def96ab784a425768fc10"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "b88e272f883039e8b35300f6fe97ce35"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "15e96bfebbc0c02afab80bc6f7994109"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6290b78a86fd27104c7f2194660e6969"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d27d86d1cf295eebc0504036c8c89202"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8eae2c1c63e1fd5ca8cf172c29195464"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "2207ca286047f2f61f539f2771e00970"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "8fcca603f94a08e24821e05dfec04080"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a7a34042c88bbc10190ffb194c87d2ab"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d427f8903fca9b6929f25bfc857ede1f"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "63db146e78e386a54a04401b4f1e7a28"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7d650d8366fd80970836dbe55f13d1da"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d4a9971ee116d3cfcdbb2731c522f904"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "58d57a5d40a1e06b10d8e06155408129"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "1479c4ee21e75dfd0379c1b7836123b4"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "085fbf2c615cfbd58889fb81d8b5767f"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "1c5218059bad3831ad65f642befab2eb"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "77b0246d5506ef6856fa3fc18e2f9407"
},
{
"dataPath": "params_shard_259.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e1c8a77ee3e945a26e566b5c19462b68"
},
{
"dataPath": "params_shard_260.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "bfcf64b35ce3f05f9baa2cb73474a87d"
},
{
"dataPath": "params_shard_261.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c51618a193704af9c927b1b5e37dd87e"
},
{
"dataPath": "params_shard_262.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e6d22002c82e306544fcd227cd782c8b"
},
{
"dataPath": "params_shard_263.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a8d6fc842017ac29cd49219921d53271"
},
{
"dataPath": "params_shard_264.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "2c95f6e977e6f21df996a9f4cdd3e4f8"
},
{
"dataPath": "params_shard_265.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "e85e05bb11c67a499b25257ebe6f822a"
},
{
"dataPath": "params_shard_266.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8fd8c638a0699ca082b8368afb7fcefd"
},
{
"dataPath": "params_shard_267.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e850d3043e170bb18e3b41854745df74"
},
{
"dataPath": "params_shard_268.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b7cfe36734f23ab94ce112ecbde6b3f1"
},
{
"dataPath": "params_shard_269.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a1839ef7605ff2bef70b468a1c707c5a"
},
{
"dataPath": "params_shard_270.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d9afdfe6098e885f457ba1d891d18d4a"
},
{
"dataPath": "params_shard_271.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "48bc5423c443497ad06c91d4305206b1"
},
{
"dataPath": "params_shard_272.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a830941bebba5fd6296fcfd3c61a5aec"
},
{
"dataPath": "params_shard_273.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a44219a9e64b329825d879b6363f2d62"
},
{
"dataPath": "params_shard_274.bin",
"format": "raw-shard",
"nbytes": 30162944,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 25968640
}
],
"md5sum": "9c50cdcaeea38b369b41e8974b96d9c1"
}
]
}