WizardLM-2-7B-q4f16_1-MLC / ndarray-cache.json
KLMFOREVER's picture
Upload 2 files
742258f verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4073857024.0,
"BitsPerParam": 4.50042279387851
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "ea92bd10956c71bd3bfa0bf5e70c00bd"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "83c9d51c0fd11cb87771f0103d72148e"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "30afbf8029683c4d3f10a3aa0164ab18"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2751cd7d8b7b5cf219a364edc59bc641"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9df53027ee2f12596c3b8ca9a23d2b82"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 30244864,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8192000
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 8200192
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11870208
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 19210240
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 19218432
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 19226624
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 22896640
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30236672
}
],
"md5sum": "b3ccf48e7255b4e87408740b7eca0181"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "943157769be74cd31dc21eeb9e00ae74"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2e87263ba13873ae270e3d09e5051845"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "b8a67d7c2b5460e7394a90dbf9ff65fd"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ca83dc790042097a6443b70cc6d36d86"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "0d5bb5109af241e8be79c3185f2b9da4"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e1f3eedc50e7fe647c96065b8c2aedff"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "d2c74b197802c3018ec9194f520bcab5"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "290d9ccac13bfee25a56bb237c1f6c4d"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cf2598461628529f2a1567d5645246ec"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "7ae6a3f62f29148b4b7fdabbfd8402c8"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5a59d8157a2ea31d93d91444be14a9e4"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d51bf7893aa61e19c7ca98aa6e0c15bc"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "ed9ff4a16887e35c2635970099e13e67"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6e7f9be0aae4f57dab35c31eab247f4e"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c335cf723542d04be693a064096596e8"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "ca3dfcf43a1ff82f16d3563279c62d63"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "eb256278a18cbbf38b4549228aeded1f"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "1732eb8d599cd9432df58f4f01591f7e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "add277ce8f52148019a1966aa21da22c"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "b84140dbee12ab9a4025ed13b074fb78"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "30a231f6745e1fa8bd7052c59b261fbe"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "dc97f65f12654aaa9d14da919ddc971e"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "c2983880e0c3750534244de66fc4c313"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3b81d35d58d9c6d19b79d1210285ae75"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2878d888c43b2b03d14745893c551ba7"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "f81c66d8e3b5d6f6be7b82af6a3b5d88"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "0b43f474b1ac7253da4e83542ce70085"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6c5a422b1e3437dd22961df748d4654a"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 31801344,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 23601152
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 31793152
}
],
"md5sum": "834d1aa77c78ab427e9051bba42ecf72"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "376ab526a72741e618d0bc326175dcb4"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "d71a31444d05b5aaf1cc3df42b3ad7e6"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ba48532ad54361eed32348bfdd37429b"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ed1141a27d45ff97a3745f1eee27c8b2"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "cb2fd50124875e948ae93ab35b94a897"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "96da1c8961272169828b0c60f4b94e3b"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 32505856,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 11010048
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 18350080
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 30932992
}
],
"md5sum": "a0d2a948c69256e40c41d9911e10d112"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ca5bc5554616167c47b672c2fce80ed1"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "adbf70d3843bf1680898292098c8f165"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "9d0e4ebc5db83cf3d88c177a95e25e6a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e50c3c67615dd4decac87b17f33c49b6"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "265caa7af2ed96fb7b5569ecf05436ea"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "b9c10e43deab62466b52fec079b18f9e"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8c42f7705d7dab8b7e99058242aac2db"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ec0792982fdbaaaa1a5041d5266c574b"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "6a96ea5e30edc0e40e6ffd7eb28c617c"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "90d5c7c491d1b0be2e5959aff1916535"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "5fd45c768302533b5675c3f42b7fa8bc"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6e48408e996d493e6d2f905dfa277d11"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "0a1981c123a234306101ea543b2794ee"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ef5a21c9e4e8fc741ee465fb54790197"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "17d6de88247ff8dd52304fa3f4f5d6a8"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "8b339d79c9547afeda4f297f98b19aac"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2027d080057ed7efaea2a0f5fc5d643f"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "658d8a89a33a55d3f17dfeae2329408d"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "39083ab8b09b0648645b534ec7fc9e36"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0c1b4eebef4ffe8e4c1a5fa11704bf5e"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e0c4541c03fe4450fb400e40b3183eef"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "86f5cc627fd8cb722b46489e6548f073"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2e095f2a47cac37f18807e2411840ad2"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "c3d75c4e72915566c44f1bf4dbf738da"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8a93e613db338710590667b9b04cb01a"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "82dfb289b08873b3a3ced785ad3fa390"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "aeaebdbf01153f2ceec5d463d8c90602"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "01f69f5389da2a98da6b8d122239adf2"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cd13600478955798897fe6c5bbfe43a5"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 24150016,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 13115392
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 13123584
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 13131776
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 16801792
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24141824
}
],
"md5sum": "c41255173d166b5a3bc4280e89ccb556"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "43451d048271d72fb8f752f1f1c89917"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "08845265b7909bf30d92f25020e13ef5"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "a8e30af7945e6357f999f12de2391a0f"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0bf05591981f4752a05004e3ebc44af8"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "86a8cc075f64fda11069a660fdd78c08"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3f00a7dc4ed6e685da1d7c019051f97d"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "25d6f8d59339c2b469c44321ff70f393"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8ae5a58d9c577e1d40bcefb844f0972a"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "49c878b03f131355da37a3a06af8b5b2"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "e390e7d75990e5adf3c9c0b420dffc35"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c0ccc9436147f07aa7ed31dbc64e1338"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "392d613841f5ed272611028d51b55b94"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "8265513b17af532e0bc0af88b152bacb"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "11d5f508dfb4bf3bd7784b631b7aeab6"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "407d16ba839620552e10a8254a3b71ac"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "bec3204b7cb6c7dff1160630c2f85135"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6e12dc5c1b29edea475cc2081118c9bb"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "4c45ae0afb4ad84c5877f52c0d447406"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8d5245cbf5feda469bc192c06ef68d35"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "87c5b9cb5070c8da7b5ec94308dddf97"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "698689312dc9b2c18a1ef9a06a4f4015"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3e70871503e5bf2b4a54f031c9a77985"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "84ed5a5ac6431b54ef65fab93a933c97"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "62f8b472cb0ea4f9e5748fd5136b5945"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3dc835a2c209decf9cb2b9290e63d28f"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "9a3d2a9eca033d720524ec63066cb491"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0c8f0a904714bf1d51b090453444d054"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d73acad94fd61466e3a9cee44c6c95d6"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "1f921f2b367ae3b8ac84ff3b73636531"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "982b5a574a6a249c63922efd763855b8"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "50e1e7efdc695f3cc5f7bf93a8822d11"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a021eaae1e88b3c3c019aed19ecb73e3"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "3e1f4120d800910e090ac5ae03b8e6b8"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 9437184
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 22020096
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 23592960
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 31981568
}
],
"md5sum": "4c4f82894275ff0b75eded23f2af4dd1"
}
]
}