{ "metadata": { "ParamSize": 198, "ParamBytes": 3087428608.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 466747392, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 151936, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 466747392, "byteOffset": 0 } ], "md5sum": "9c394790c22960ff045f6cecf24f2478" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "2b2470b4a9e72adb478d1631f7405cf1" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 27535360, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 3072 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 27528192 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27531264 } ], "md5sum": "dc9e08ac4db31734a1b603ee26599517" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "49563c8f4aa9a7eb793010398bfda240" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "51521a14a5183f53e7597fb1b0c1ac02" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "3c9458be61e6f6d2f09bba659866fdc5" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "e09a65b0e17d3503c7fc7a1ca671a5ba" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "672289b109ad0020f17e9f6844d72eb0" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "1468ccdaed4ba067e4867957c993c049" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "386225f7f70fddb6a9e43795ad3a3202" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "2f0ee2ee3560e76e2906ba68bed46ccd" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "af10359a4a55734f11c85bd7abd23af3" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "bd931a02a4417507ced3902718fd7d78" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "a886e033b34ef4492524060ada67c5f3" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "fdf7135c7f050510e918a6da2eb747e4" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "19b7ab0aedab2c98ef84d1bb74677cec" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "40123c949516b96ac14030b58f97c970" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "ba52e42aa6e9bfddb33bc6be2cbbed6c" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "ef3a674229205ddd58f4e2a2df97af1b" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "4f3105a964d9957e5b54bce793846d4d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "5e1387ab5bc412368449752b6370e568" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "cb2e93a9ceaf3233fbd74948a6c249d9" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "8a80c045d2edddb7ef0573f7c564c519" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "2d0661bc87c9fd3e7da9a0d13349380a" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "e6d23b39b8cb567cf0a4a7e6714cd78d" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "b4df51c707ad996798fd88cca7d4f82d" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "00a26156008979c2af45a95ec82a3a0d" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "4d45780eb98ca483a06df9ab8adbe7e2" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "75221e62142c91edd2dc091cec30665f" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "0fbd4abacd6750357450eaac4c8cfba8" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "6fb6758ef41b318d5f5bc1c009f643ac" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "dd93ed00fe037887023cc5e040d40186" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "d36d64f0ffa4c128414487032a334a9f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "8ae17a585657e430228933f94477a4bd" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "b8788042f9ad0c7915afadbe04e31930" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "353c3896a5d202428c9620719a05bca6" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "c0f5cd315d9f707d055fc2505442cb4e" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "2a5ef0f2584bde186b674af63e6e7b75" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "e0cc850be792ebbec970e7ff6f4856ef" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "36341c148fa89452c08e764f461d6298" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "21773a84fcb6c6e742babfba3cdb8359" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "e671efec0402a6cee8977fc5c58e507c" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "5bb0260d69401b96e8c6bdebdfca36be" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "59128784d429bdd191282b7d6442addc" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "2e3ff83df81c41b602349fa77b1f52b4" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "0d198691fa3d0f8a8f048203a35098c6" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "ae27a680d3d40b1fa8619c493bc41372" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "a3da97ef93ff7b3a9f75fe3c61966a07" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "9dbf820a69c2940892a3ba78eaa3ac17" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "923a91f17052a3d0f91efd819f28a9e3" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "b2e04d25a01a6e878053e3929c326e5c" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "e3772e7e14ef6fa64fc1d690dc042bb6" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "889d5ae8be3b2a52a14a806a87d91a51" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "d9e5436000200181c6bb168b3a57895b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "d40f1fddbc729b64c88e261d6b556b48" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "3c664af5bf4dc2b72824b91dc57e191b" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "5bc317302d4897884b55ec6f42427685" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "add3b34b5d3ff066fc2f581f2a8e6d80" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "ee80628c2a2a049c273a1e9b0b1b7ddb" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "afd49cfb9ea9a046505f1c2b166d4e4a" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "9390f2ec375eb8c6156ec82543434e6b" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "eaf99bc611ac6669988c0cf21a32eba0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "c910c61629f83e45715ca3cc84de034e" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 27525120, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 1536, 8960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 27525120, "byteOffset": 0 } ], "md5sum": "8f0903d34e62052c91a180e686ffa258" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 55050240, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 17920, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 55050240, "byteOffset": 0 } ], "md5sum": "be500b3df22deade101f844744892dc1" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33060864, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11013120 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11016192 }, { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 11020288 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17311744 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22030336 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 22033408 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22036480 }, { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 22040576 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28332032 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33050624 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 33053696 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 33056768 } ], "md5sum": "8df7c8c8dc860c0e0a4670128a173b70" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 11013120, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 2048, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 1536, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6291456 }, { "name": "model.norm.weight", "shape": [ 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 11010048 } ], "md5sum": "db8b6ec54d311d54bf38489a6a162580" } ] }