{ "metadata": { "ParamSize": 195, "ParamBytes": 7642159104.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "lm_head.weight", "shape": [ 32064, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "48a04baa06c8140d1ae4fd61ea2a51b1" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.21.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2065d9f788567547a774f5ffb9f1131e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.21.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cb9728ad4fa6db615b1caad12129df66" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.21.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "93eb83c12f0c5e8a7c3108b394c91f84" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.22.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "608636bdc84e74e9c06edc92b184ad51" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.22.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "016c70a051cf51d54861397f6e389ba0" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.22.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "6fda1306b81ea8a098567dc6d73b273a" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.23.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9fbb3dbb48f84112a1cfdd1a3dfc1309" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.23.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fe08a9496e9398f79dae924ceb0067eb" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.23.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "50bd13a3e1b4f4f677030aa126d58003" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.23.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "a4bda16fe3cd9cc9a45a679920864915" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.24.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a7c2b8f5bc96ef8751dace0f4943eadd" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.24.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ebc48a19827d58234b2c8bf0681a127a" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.24.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4b73dc6122a84c6529a5f0ba77b6f9df" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.24.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "8067191930fe75a1d628d5cd61a0bd37" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.25.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4600584c01f8b59b15bbb39157f83ea9" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.25.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dee6947e469b73541a20200c4dfafc60" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.25.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "48e123914a2e6b87cf0f84b551c39274" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.25.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "c97246a9dc9042ec19934b2cf8b30594" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.26.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cef6f7e1c2aef1f38f38ba9f0cef500d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.26.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "491dd3fab5b55ea5b79732be5ac73ad9" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.26.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0cf916f8ba3a5b21acb9979546b353c8" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.26.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "a09a3bb9780b39448747ff97bfb01f77" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.27.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bc70e157266b370a65b2260fb956ef05" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.27.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e3049c1414aa0312b11252bc0df8e012" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.27.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a3cca75a3b6fab52c9e6d2925ed8c740" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.27.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "a8a572beb8324e103d72a1c492b447c8" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.28.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "838f4f01a65960266efa7b3dcfcd6f62" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.28.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "98d5793d5b479999176cc86adf256624" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.28.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "54d4c13f9f91500f02e4e550a952fa5a" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.28.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "07cb6d2348769db041143c3e0774ecab" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.29.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b292bd2096b63ce0b3a69d8017346d07" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.29.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "710dfac38663642c90b81a90955e53f0" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.29.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "184397f1a9cfa93527bd0b47bfc0f4d9" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.29.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "c0b0cdb47135abd8be76c8cdf519687b" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.30.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "26b1c36720773b1715560a3380cfe5e8" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.30.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "65ab4c77f8d88808327b115184c44056" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.30.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0a969f022b81189c9675ca624838bb7b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.30.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "1f1118702f5f96cb08d2eadef0cd73a5" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.31.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f6391b80aa186aca2df8d816b3d5d20b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.31.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cc354c852575d11406c100e4ecb6a360" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.31.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2f75fd301b667261b7b5cd83e306c61f" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.31.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "e0774dd3e2453472fe59ba3797255c73" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "transformer.embd.weight", "shape": [ 32064, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "2794efed4f90da2679835636c49453c1" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.0.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "aca7213bce912376770af1f9a7805546" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.0.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7d45b5a1d117d0fb2f97c2566b85c080" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.0.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b29c48242ab28d7751664fd917cb53d9" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.0.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "43ffb7ce1c9bb16f42f8cd92533105ac" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.1.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fba6c01b38085811a0b169a689e9a8b0" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.1.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "754739f276a532078fb8adf8a358a84f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.1.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b08e5972a3ee4dc0a5d2446c74b78667" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.1.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "6a326d18c9a02a0ff844de3209d9d239" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.10.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5559680d21364578cee8b4f3b884c387" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.10.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b5a2560d04db862c4b0b44d480b3de11" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.10.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d7b4d5eb54d90c8b88cfcdcb7ab7a6d3" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.10.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "a2f33fa311c565fc0ad4b0b2ac9c24cf" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.11.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2ea00513628cede454214266723b73fe" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.11.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6f031b6a01f57cb949b87939fe3a6fcf" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.11.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7629386137cde4883fe6092961628908" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.11.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "74f94e083aa4730d2e928a652bb93aff" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.12.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fe1dcf39db8e8c03468b5714d7574f1d" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.12.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c55494e7965fcddc67886f2719fa4f28" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.12.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "01823150ed94ebda1ac03ff514674d59" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.12.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "f7606df9f1f8a7ae529f3c3d84812cfe" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.13.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5efd0889d320559ec7d2587d88560310" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.13.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fde3986982d33d33d6f9033954c4f76b" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.13.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2d6c2ac2c20271403136de2b690e2c10" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.13.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "f3e6c1bc7e706a4ca274baf2bb296a3e" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.14.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a6206e415bbb24b99496af3833bde153" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.14.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ce3c652a54a9d59b42dfd107c214fedb" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.14.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "08432e0cda4f3d4efb5af8226d292545" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.14.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "4da8e23a88baea7d245115482c783b88" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.15.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "61fe1b44bf673ca96c6f5febd6dacaaa" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.15.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d03015371c3aab576d7c554299a64a19" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.15.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ba6b9594289c02e4bd8405905e3b9345" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.15.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "30b9eab0df51695103e0d2c8281853d2" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.16.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a87a68606ce19491b283ebfcb4f261e1" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.16.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3bb222858ea27693a823cc6e86b89394" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.16.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "aafaa8e5e6341a8b186315f30b33eeaf" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.16.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "c0ec806ad19c5f93174e13e77343e9d1" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.17.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fa41b02a47344ce12ce2462ba41cddae" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.17.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8a5537e9896e145836ebf99dbb1d617b" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.17.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "56aff7c581b6f3aa14835e0cf733a113" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.17.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "bcb98fa15aca763650cfb90aad95ff8a" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.18.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3d3b925b996f918f06902cb77831e82b" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.18.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "006e594d0ec96f0a2b3824eeabc4405b" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.18.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "73d5f7100db12528051f7f19aede3d60" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.18.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "9f9e0dad8db22348008c009241057bba" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.19.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "64cba4f187a6c45a05dc06853132d7c4" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.19.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a10d189b10ace93718ab187f3f78b402" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.19.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6a8801a5037374d1317431d8ed723218" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.19.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "329a57ff77416e0507f6fe9e662681e7" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.2.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "40578ab77b8ea1571d74b9a25d46d821" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.2.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e88194e0aed044702686ba4edd832d5d" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.2.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "baca8426aae056bc7a3ed32f5f7fa3b1" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.2.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "43077c2fdc9511c9bc7750d89ad53321" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.20.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "16c500aee75a3f6fadfb79388040ce59" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.20.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "35e566f071950f559d9a68ed651cdc0e" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.20.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bc4c11294dcef27da24c0d0c41cddb81" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.20.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "6c09843d6224c0c491c11052703711d4" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.21.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5301ec982f0f0bee7cbfd66c5f10db1e" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.3.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ad69d774f1ef8d178b241ed77788034e" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.3.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "71b49e1b1e72c73d95a73a80ff85cf56" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.3.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0a14557ee27469ea0c91accf21f1882a" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.3.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "1f612196539cf259ff0ba018a1d3fdd8" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.4.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "125b30cce73232acdd57017b751b8ec0" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.4.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "174b431f56124195a88de449325221d5" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.4.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f47846816d3009156669ef65dd815b9c" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.4.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "fc5eab9b081abe8d88f8713d7340a33e" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.5.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "57feb42a7dad4058566e19deebdacfdf" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.5.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "122ada6175d0fb97ac9bd9c1c28f4a83" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.5.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4c27a252315d601d17df5b45c4ed394b" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.5.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "73184c595beddb967a0a1fb4adb8a697" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.6.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e4e3eed244b4ffe0542a0e6e950d5c23" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.6.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4f529e42dabff29fb4432f509073c9e0" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.6.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "48c4aca8ffbb252d17c5f5effb09a532" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.6.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "8a0f2cc6cd07a2208027ced9f52c3ad0" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.7.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9657e1f37ef7a3a950ff28e1b1c8d288" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.7.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6c071534349e4c2b500aa75b5f207769" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.7.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8e49ccfc80aa0ffe5526847f8c955f83" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.7.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "db54d2b3c23b16af51bc756ecf127307" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.8.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f9ab4d31e88e01d5d03b3cc9de36a71c" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.8.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "13f0af026f446e192ec681cae51413d4" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.8.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "084906da91b4132050bf217661c40b19" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.8.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "e5c8f3b8d7bc024599ea7c12e86e12b6" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "transformer.h.9.mlp.down_proj.weight", "shape": [ 3072, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "eaad0fe95f377541e343e2b65c526166" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "transformer.h.9.mlp.gate_up_proj.weight", "shape": [ 16384, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f32f91ad037860d6cd65911faac9277c" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.9.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "442e739c1bd8c0ad4a7528a629d4f556" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 56623104, "records": [ { "name": "transformer.h.9.mixer.qkv_proj.weight", "shape": [ 9216, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 56623104, "byteOffset": 0 } ], "md5sum": "cba65d66e4a5aa25c15a78e798ac7d5f" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 19273728, "records": [ { "name": "transformer.h.21.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 0 }, { "name": "transformer.h.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6144 }, { "name": "transformer.h.22.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12288 }, { "name": "transformer.h.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18432 }, { "name": "transformer.h.22.mixer.out_proj.weight", "shape": [ 3072, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 24576 }, { "name": "transformer.h.23.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18898944 }, { "name": "transformer.h.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18905088 }, { "name": "transformer.h.24.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18911232 }, { "name": "transformer.h.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18917376 }, { "name": "transformer.h.25.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18923520 }, { "name": "transformer.h.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18929664 }, { "name": "transformer.h.26.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18935808 }, { "name": "transformer.h.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18941952 }, { "name": "transformer.h.27.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18948096 }, { "name": "transformer.h.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18954240 }, { "name": "transformer.h.28.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18960384 }, { "name": "transformer.h.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18966528 }, { "name": "transformer.h.29.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18972672 }, { "name": "transformer.h.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18978816 }, { "name": "transformer.h.30.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18984960 }, { "name": "transformer.h.30.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18991104 }, { "name": "transformer.h.31.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18997248 }, { "name": "transformer.h.31.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19003392 }, { "name": "transformer.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19009536 }, { "name": "transformer.h.0.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19015680 }, { "name": "transformer.h.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19021824 }, { "name": "transformer.h.1.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19027968 }, { "name": "transformer.h.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19034112 }, { "name": "transformer.h.10.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19040256 }, { "name": "transformer.h.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19046400 }, { "name": "transformer.h.11.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19052544 }, { "name": "transformer.h.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19058688 }, { "name": "transformer.h.12.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19064832 }, { "name": "transformer.h.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19070976 }, { "name": "transformer.h.13.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19077120 }, { "name": "transformer.h.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19083264 }, { "name": "transformer.h.14.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19089408 }, { "name": "transformer.h.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19095552 }, { "name": "transformer.h.15.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19101696 }, { "name": "transformer.h.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19107840 }, { "name": "transformer.h.16.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19113984 }, { "name": "transformer.h.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19120128 }, { "name": "transformer.h.17.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19126272 }, { "name": "transformer.h.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19132416 }, { "name": "transformer.h.18.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19138560 }, { "name": "transformer.h.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19144704 }, { "name": "transformer.h.19.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19150848 }, { "name": "transformer.h.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19156992 }, { "name": "transformer.h.2.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19163136 }, { "name": "transformer.h.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19169280 }, { "name": "transformer.h.20.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19175424 }, { "name": "transformer.h.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19181568 }, { "name": "transformer.h.3.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19187712 }, { "name": "transformer.h.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19193856 }, { "name": "transformer.h.4.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19200000 }, { "name": "transformer.h.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19206144 }, { "name": "transformer.h.5.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19212288 }, { "name": "transformer.h.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19218432 }, { "name": "transformer.h.6.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19224576 }, { "name": "transformer.h.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19230720 }, { "name": "transformer.h.7.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19236864 }, { "name": "transformer.h.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19243008 }, { "name": "transformer.h.8.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19249152 }, { "name": "transformer.h.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19255296 }, { "name": "transformer.h.9.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19261440 }, { "name": "transformer.h.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19267584 } ], "md5sum": "f066d7c15dbf7a42f65846ff50a0ae06" } ] }