numen-tech's picture
Add weights
60bff83
{
"metadata": {
"ParamSize": 313,
"ParamBytes": 3927297024.0,
"BitsPerParam": 4.12551973205239
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
448,
152064
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "bf2d0126ac3d6fd2240a63c91332076e"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 272498688,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
448
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 272498688,
"byteOffset": 0
}
],
"md5sum": "98298f69f233ef0a8132bff7c7bcc314"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "60cd0b673d66b6e77c5d13c8f0d26e11"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "c6a5d07a8b3893d73b7016794550854e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 28752896,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
28,
152064
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8515584,
"byteOffset": 0
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
28
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8515584,
"byteOffset": 8515584
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 17031168
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 17038336
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 18099200
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 20220928
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 20228096
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 20237312
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 28494848
}
],
"md5sum": "b38c0cffe86ae0d71ef4430d32b7b4be"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "5abfb30ea8787e0fde3732267df5e2cb"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "b6572e8f6875cf4afa02a3d5e08cdf90"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "12e40deaa8465e954fd6ab7862ce537b"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "ab8814fa6b8686a3d27af26010a45953"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 28174336,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 6422528
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6623232
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 6630400
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 7691264
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9812992
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 9820160
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 9829376
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 18086912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 18344960
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 24767488
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 24968192
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 24975360
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 26036224
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28157952
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 28165120
}
],
"md5sum": "178c6a3fb33e38bca5fc9ad3ce5fa190"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "8a85c22cd72e50ee2bdb90d2c226ff06"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "b297e8964492bcaa7e65016d8bd398d4"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "7c2b54c844b14e22001f9fde0da47e93"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33490944,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 8257536
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8515584
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 14938112
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 15138816
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 15145984
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 16206848
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18328576
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 18335744
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 18344960
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 26602496
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 26860544
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 33283072
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33483776
}
],
"md5sum": "398fa91e1e3d7560b7243dfd298cf14c"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "2a96e5329d825ee8c18bfd92e4c7bbb6"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "e8892b8d9af664083d01e0dda68b5e75"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "cb963b71d8fafc1cbd7ff831114f6826"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 30059520,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 1060864
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3182592
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3189760
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 3198976
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 11456512
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 11714560
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 18137088
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18337792
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 18344960
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 19405824
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 21527552
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 21534720
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 21543936
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 29801472
}
],
"md5sum": "d5310eb04468403e0f394aeb0d22effd"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "221fbaf9f803fcc112300c55cca919f5"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "9aadfbbdf470fac93fd4acefa8912d25"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "b2a4094a1ad76a266c8088899d504b51"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "494f1690e02b807cbcd5b844b74c453e"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 28174336,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 6422528
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6623232
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 6630400
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 7691264
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9812992
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 9820160
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 9829376
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 18086912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 18344960
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 24767488
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 24968192
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 24975360
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 26036224
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28157952
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 28165120
}
],
"md5sum": "06521204547273d11bad199c4ac1c04e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "7c65dd61774c239a2f54eb82d44e3eb5"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "3791891a3cdd39eff2741c0a171e8ddd"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33492992,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 8257536
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8515584
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 14938112
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 15138816
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 15148032
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 23405568
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 23663616
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 30086144
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 30286848
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 30294016
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 31354880
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33476608
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33483776
}
],
"md5sum": "f6cf8ff37c0d9142c3ace7c631aced1e"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "6425132f8bac243bc3e625277c03b9a3"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "f871efa481e28e9d961f4b7af33c99d0"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "70786ddfd64ec6e77e5cf66d6a59a86c"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33490944,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 8257536
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8515584
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 14938112
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 15138816
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 15145984
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 16206848
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18328576
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 18335744
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 18344960
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 26602496
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 26860544
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 33283072
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33483776
}
],
"md5sum": "60f373ada0016aefab28f9a3c4d53f65"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "c1e72658dc59ab8cbbc850cc42caf564"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "620387988a5e4af9db5193a7769b3eae"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "afd4d37ec6ab5af8e28fb5a2a19a8566"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 30059520,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 1060864
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3182592
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3189760
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 3198976
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 11456512
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 11714560
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 18137088
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18337792
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 18344960
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 19405824
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 21527552
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 21534720
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 21543936
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 29801472
}
],
"md5sum": "cdcbeee880af6bc8590049aa86672b72"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "86ec7422bff27fa1e781cbdd28f76f4f"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "76e3eb6f6e0b854165b4d7bb441c2242"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "97d1999e25ffbccd2bde6fd2d7e2f7e7"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "a3d436036f680001fde853cfee2c5601"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 28174336,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 6422528
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6623232
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 6630400
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 7691264
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9812992
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 9820160
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 9829376
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 18086912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 18344960
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 24767488
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 24968192
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 24975360
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 26036224
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28157952
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 28165120
}
],
"md5sum": "0b9ea7b34221b7de9d440002de7fd93b"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "b9055e0839430e5affd67d66687ff593"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e4b5fb8d74dff5535431286d25b778f2"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "368ea3dd119bc018d4b68125ac698ed1"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33490944,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 8257536
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8515584
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 14938112
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 15138816
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 15145984
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 16206848
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18328576
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 18335744
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 18344960
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 26602496
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 26860544
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 33283072
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33483776
}
],
"md5sum": "ed8cbdc2fb1e391ccece277b2bdb9e6e"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "2f7b3c632ce97ddd9c051766f059ec8d"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "7c8689e94884df6c7789cd72315bf5d6"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 28984320,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 1060864
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3182592
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3189760
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 3198976
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 11456512
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 11714560
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 18137088
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 18337792
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 20459520
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 20468736
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 28726272
}
],
"md5sum": "65cfd3b2cf4de9b1f32028a5a15739bf"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "965d0c6ef79ad5fed1b3c77e7c8f030b"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "a9ade532c03a8dc110094d961463300d"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "0240842238f4af9131cce218a590ea32"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "68a24d294b8fd76e8e1570a2753fa630"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "34efd518bb1b1660c0958e866587911a"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "139704a6153f58ec81497476b2ba4157"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "03a402bcc479631bac3dc1d00aaef04d"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 32446464,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 6422528
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6623232
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 6630400
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 7691264
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9812992
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9820160
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 9827328
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 10888192
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 13009920
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 13017088
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 13026304
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 21283840
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 21541888
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 27964416
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28165120
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 28172288
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29233152
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 29240320
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 29247488
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 30308352
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 32430080
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 32437248
}
],
"md5sum": "887785b438a5acee9b729a810ef9d04b"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "4bc5e9bba29446754ec79022f77e11af"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "31ed438bc1fed28048dfced8d530c2d7"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "6674ed95ac88037371acf18f441f7f9c"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33490944,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 8257536
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8515584
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 14938112
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 15138816
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 15145984
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 16206848
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18328576
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 18335744
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 18344960
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 26602496
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 26860544
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 33283072
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33483776
}
],
"md5sum": "7102c8b709d549a6ddbe2d3c3358676e"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "cfbc0bf2d94f47e2bbd948668b4a6db1"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "371aedc5fe9d7542df3647b489785823"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "59f5a63196ab7e1b2b3bf356ce58cf58"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 30059520,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 1060864
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3182592
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3189760
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 3198976
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 11456512
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 11714560
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 18137088
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18337792
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 18344960
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 19405824
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 21527552
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 21534720
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 21543936
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 29801472
}
],
"md5sum": "7092a8fbcb33c29e4d08d2a7a058c156"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "f9fcd685bff01f6d50e146a46b481b56"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "e985959af627f755b2f4888b37457e13"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "b14d38b1d7a958b9e060444333e9b3b0"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "bbf59f0271030b91bd28d7ff9480e9fa"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 28174336,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 6422528
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6623232
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 6630400
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 7691264
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 9812992
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 9820160
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 9829376
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 18086912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 18344960
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 24767488
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 24968192
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 24975360
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 26036224
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 28157952
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 28165120
}
],
"md5sum": "4ed8b617f3992544577e8933ffd77314"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "717f3478f7442da0f9b368eb85295586"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "1c4b3639920360d84fed03051a0de437"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "c939a48045a78236c11b8ca08e6090ad"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 33490944,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 8257536
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 8515584
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 14938112
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 15138816
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 15145984
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 16206848
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18328576
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 18335744
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 18344960
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 26602496
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 26860544
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 33283072
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33483776
}
],
"md5sum": "a7d88373af20f2ba28802a32b136aa7c"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "32a3ea6342193c1b3e99a35c029e9e77"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33947648,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
2368,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33947648,
"byteOffset": 0
}
],
"md5sum": "d8f76832efadf065c5fce9f621524f9e"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 67895296,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
448,
37888
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 67895296,
"byteOffset": 0
}
],
"md5sum": "0bb50127725aacd5b1da73438cddcfa5"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 30059520,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 1060864
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 3182592
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 3189760
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 3198976
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 11456512
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 11714560
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 18137088
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 18337792
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
148,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1060864,
"byteOffset": 18344960
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28,
37888
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2121728,
"byteOffset": 19405824
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 21527552
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 21534720
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
448,
4608
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8257536,
"byteOffset": 21543936
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
28,
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 258048,
"byteOffset": 29801472
}
],
"md5sum": "c04195cfd50f29d7a06069e3c6d11ea5"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 6630400,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
448,
3584
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6422528,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
28,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 200704,
"byteOffset": 6422528
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 6623232
}
],
"md5sum": "2f47b0e16edbc7fba2256ad35d55dd99"
}
]
}