{ "metadata": { "ParamSize": 269, "ParamBytes": 2067144704.0, "BitsPerParam": 9.003102948079416 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 311164928, "records": [ { "name": "lm_head.q_weight", "shape": [ 151936, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 311164928, "byteOffset": 0 } ], "md5sum": "9fd7aeeb1bbd80df281032f8a6453436" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 311164928, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 311164928, "byteOffset": 0 } ], "md5sum": "8d4916c17f623990150071b3c1087e8c" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 19447808, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 19447808, "byteOffset": 0 } ], "md5sum": "73ad34503189367643327b0e5cd665a1" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "8231111064935d193fafdb1d8097f7d0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32854016, "records": [ { "name": "lm_head.q_scale", "shape": [ 151936, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 19447808, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19447808 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 19451904 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 30724096 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 31428608 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 32837632 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 32841728 } ], "md5sum": "d7b58d27d16518bf9dfd1bc16e6a97f6" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "590b52e6c39bb5a9cb77779c0c954cae" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "c899cb4c4948e81649c3d81e64641931" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "52d06a984d511c203e1ca464d38fefdf" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "86a5dab53488833f6734147d6f87184c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7320f9a23e7c25f040de925004b1878b" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "8c4bf3a39137cf0f5af2ed467c05ef1b" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ecb4fa6268f906bb02e3882c121d876a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "cf4536351c03db3982236e48a0379e39" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c4fa255b4189ef8672b0294d38e4a24f" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "a0ebe921dbb1ac894b7b48d51055043f" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "376daf4db4a00f9088582f22e063291b" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "5f4ab6c00994dae3f5c863d4a27de06e" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "794b77cc14531b07b5de1daf330d81a7" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "b02dc443de5cacfd87ac728f71a5d113" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2ee1fb7989a2b523cd878fdd605211f3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "998c9c82ae278c936fa9b98172d10edb" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "de13765b1e31342906e2f4e0b02c81f0" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "be8cc00a2a061877c46d8d33b15007c7" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4b4f17eb683c36c53a71a24dc342bc36" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "bfe84819102f516ecefe13317b7a868b" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1c8f3e7b681d47c5afcfbfa492a82b0d" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "fbb336c1e963fb8224ad0cce6f6c9eb8" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a3a2e17521e9939915f3585e13d816ad" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "63d9b83b4cdcdbfe4e8423cf3a052591" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "885875e67aba433587f42bce09f5e7d9" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "fb540774e15a07e7ff1e737b6e1e6ece" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "cffe15773593df15c2329d29d3634f04" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "c653d849393b27e72ccae83159f17bf0" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3cc58cd5ed682131541d522c2a66309c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "9bbbc6b06d53f73b61d6d8adfd9ec63e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "a7c8bb315b8ced1aee9ee37a6fca13c5" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "e1d6b5dd3b7f741ce57bf49a41b91ff7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2e7a8b1fd8c4d904377bd56476e9284a" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "d7ae246c1a4994e5c651f1f9a7c8bd89" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "523a8636cbbef697fdb075efe0bd2450" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "d27b8ca902a958a7a40b04c65e04b6f2" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "93f4f1035b6c28ad578ee076b4c8aa96" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "6d734f64c8eb4ac237e3a9484ffac89d" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "84b2eb0f2d5376b493c6979fc1372e08" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "bd91fa4a4cda4c7cccdd3146131872b0" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "339cc4ec4f72d11502b4797f7f70cb6c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "07b4af925b95f11fdd782425d19382c7" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6a761272ae46d4daee09d5bf115ed0dd" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "0515ebda56dc78c731b0aa1399845bab" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 11008, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f3445c55433689e2b2f12b91b703302c" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 31232000, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2048, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11272192, "byteOffset": 17829888 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2048, 172 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 704512, "byteOffset": 29102080 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 11008, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1409024, "byteOffset": 29806592 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31215616 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31219712 } ], "md5sum": "8e2d34afc5c6986b2033426b47594619" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 17829888, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 6144, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2048, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 13369344 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 17563648 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 17825792 } ], "md5sum": "91964c6c5e5a4e7ac1c2ba0237933f7a" } ] }