diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3129 @@ +{ + "metadata": { + "ParamSize": 195, + "ParamBytes": 7642159104.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 197001216, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 32064, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 197001216, + "byteOffset": 0 + } + ], + "md5sum": "48a04baa06c8140d1ae4fd61ea2a51b1" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.21.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2065d9f788567547a774f5ffb9f1131e" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.21.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "cb9728ad4fa6db615b1caad12129df66" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.21.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "93eb83c12f0c5e8a7c3108b394c91f84" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.22.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "608636bdc84e74e9c06edc92b184ad51" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.22.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "016c70a051cf51d54861397f6e389ba0" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.22.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "6fda1306b81ea8a098567dc6d73b273a" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.23.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9fbb3dbb48f84112a1cfdd1a3dfc1309" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.23.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "fe08a9496e9398f79dae924ceb0067eb" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.23.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "50bd13a3e1b4f4f677030aa126d58003" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.23.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "a4bda16fe3cd9cc9a45a679920864915" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.24.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a7c2b8f5bc96ef8751dace0f4943eadd" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.24.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ebc48a19827d58234b2c8bf0681a127a" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.24.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4b73dc6122a84c6529a5f0ba77b6f9df" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.24.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "8067191930fe75a1d628d5cd61a0bd37" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.25.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4600584c01f8b59b15bbb39157f83ea9" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.25.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "dee6947e469b73541a20200c4dfafc60" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.25.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "48e123914a2e6b87cf0f84b551c39274" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.25.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "c97246a9dc9042ec19934b2cf8b30594" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.26.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cef6f7e1c2aef1f38f38ba9f0cef500d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.26.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "491dd3fab5b55ea5b79732be5ac73ad9" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.26.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0cf916f8ba3a5b21acb9979546b353c8" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.26.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "a09a3bb9780b39448747ff97bfb01f77" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.27.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "bc70e157266b370a65b2260fb956ef05" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.27.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e3049c1414aa0312b11252bc0df8e012" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.27.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a3cca75a3b6fab52c9e6d2925ed8c740" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.27.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "a8a572beb8324e103d72a1c492b447c8" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.28.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "838f4f01a65960266efa7b3dcfcd6f62" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.28.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "98d5793d5b479999176cc86adf256624" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.28.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "54d4c13f9f91500f02e4e550a952fa5a" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.28.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "07cb6d2348769db041143c3e0774ecab" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.29.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b292bd2096b63ce0b3a69d8017346d07" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.29.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "710dfac38663642c90b81a90955e53f0" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.29.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "184397f1a9cfa93527bd0b47bfc0f4d9" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.29.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "c0b0cdb47135abd8be76c8cdf519687b" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.30.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "26b1c36720773b1715560a3380cfe5e8" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.30.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "65ab4c77f8d88808327b115184c44056" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.30.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0a969f022b81189c9675ca624838bb7b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.30.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "1f1118702f5f96cb08d2eadef0cd73a5" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.31.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f6391b80aa186aca2df8d816b3d5d20b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.31.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "cc354c852575d11406c100e4ecb6a360" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.31.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2f75fd301b667261b7b5cd83e306c61f" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.31.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "e0774dd3e2453472fe59ba3797255c73" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 197001216, + "records": [ + { + "name": "transformer.embd.weight", + "shape": [ + 32064, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 197001216, + "byteOffset": 0 + } + ], + "md5sum": "2794efed4f90da2679835636c49453c1" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.0.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "aca7213bce912376770af1f9a7805546" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.0.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7d45b5a1d117d0fb2f97c2566b85c080" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.0.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b29c48242ab28d7751664fd917cb53d9" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.0.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "43ffb7ce1c9bb16f42f8cd92533105ac" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.1.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fba6c01b38085811a0b169a689e9a8b0" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.1.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "754739f276a532078fb8adf8a358a84f" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.1.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b08e5972a3ee4dc0a5d2446c74b78667" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.1.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "6a326d18c9a02a0ff844de3209d9d239" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.10.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5559680d21364578cee8b4f3b884c387" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.10.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b5a2560d04db862c4b0b44d480b3de11" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.10.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d7b4d5eb54d90c8b88cfcdcb7ab7a6d3" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.10.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "a2f33fa311c565fc0ad4b0b2ac9c24cf" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.11.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2ea00513628cede454214266723b73fe" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.11.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6f031b6a01f57cb949b87939fe3a6fcf" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.11.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7629386137cde4883fe6092961628908" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.11.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "74f94e083aa4730d2e928a652bb93aff" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.12.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fe1dcf39db8e8c03468b5714d7574f1d" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.12.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c55494e7965fcddc67886f2719fa4f28" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.12.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "01823150ed94ebda1ac03ff514674d59" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.12.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "f7606df9f1f8a7ae529f3c3d84812cfe" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.13.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5efd0889d320559ec7d2587d88560310" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.13.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "fde3986982d33d33d6f9033954c4f76b" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.13.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2d6c2ac2c20271403136de2b690e2c10" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.13.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "f3e6c1bc7e706a4ca274baf2bb296a3e" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.14.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a6206e415bbb24b99496af3833bde153" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.14.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ce3c652a54a9d59b42dfd107c214fedb" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.14.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "08432e0cda4f3d4efb5af8226d292545" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.14.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "4da8e23a88baea7d245115482c783b88" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.15.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "61fe1b44bf673ca96c6f5febd6dacaaa" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.15.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d03015371c3aab576d7c554299a64a19" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.15.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ba6b9594289c02e4bd8405905e3b9345" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.15.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "30b9eab0df51695103e0d2c8281853d2" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.16.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a87a68606ce19491b283ebfcb4f261e1" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.16.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3bb222858ea27693a823cc6e86b89394" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.16.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "aafaa8e5e6341a8b186315f30b33eeaf" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.16.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "c0ec806ad19c5f93174e13e77343e9d1" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.17.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fa41b02a47344ce12ce2462ba41cddae" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.17.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8a5537e9896e145836ebf99dbb1d617b" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.17.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "56aff7c581b6f3aa14835e0cf733a113" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.17.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "bcb98fa15aca763650cfb90aad95ff8a" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.18.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3d3b925b996f918f06902cb77831e82b" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.18.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "006e594d0ec96f0a2b3824eeabc4405b" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.18.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "73d5f7100db12528051f7f19aede3d60" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.18.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "9f9e0dad8db22348008c009241057bba" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.19.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "64cba4f187a6c45a05dc06853132d7c4" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.19.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a10d189b10ace93718ab187f3f78b402" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.19.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "6a8801a5037374d1317431d8ed723218" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.19.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "329a57ff77416e0507f6fe9e662681e7" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.2.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "40578ab77b8ea1571d74b9a25d46d821" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.2.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e88194e0aed044702686ba4edd832d5d" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.2.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "baca8426aae056bc7a3ed32f5f7fa3b1" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.2.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "43077c2fdc9511c9bc7750d89ad53321" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.20.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "16c500aee75a3f6fadfb79388040ce59" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.20.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "35e566f071950f559d9a68ed651cdc0e" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.20.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bc4c11294dcef27da24c0d0c41cddb81" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.20.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "6c09843d6224c0c491c11052703711d4" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.21.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5301ec982f0f0bee7cbfd66c5f10db1e" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.3.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ad69d774f1ef8d178b241ed77788034e" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.3.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "71b49e1b1e72c73d95a73a80ff85cf56" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.3.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0a14557ee27469ea0c91accf21f1882a" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.3.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "1f612196539cf259ff0ba018a1d3fdd8" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.4.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "125b30cce73232acdd57017b751b8ec0" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.4.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "174b431f56124195a88de449325221d5" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.4.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f47846816d3009156669ef65dd815b9c" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.4.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "fc5eab9b081abe8d88f8713d7340a33e" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.5.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "57feb42a7dad4058566e19deebdacfdf" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.5.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "122ada6175d0fb97ac9bd9c1c28f4a83" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.5.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4c27a252315d601d17df5b45c4ed394b" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.5.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "73184c595beddb967a0a1fb4adb8a697" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.6.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e4e3eed244b4ffe0542a0e6e950d5c23" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.6.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4f529e42dabff29fb4432f509073c9e0" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.6.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "48c4aca8ffbb252d17c5f5effb09a532" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.6.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "8a0f2cc6cd07a2208027ced9f52c3ad0" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.7.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9657e1f37ef7a3a950ff28e1b1c8d288" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.7.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6c071534349e4c2b500aa75b5f207769" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.7.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8e49ccfc80aa0ffe5526847f8c955f83" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.7.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "db54d2b3c23b16af51bc756ecf127307" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.8.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f9ab4d31e88e01d5d03b3cc9de36a71c" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.8.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "13f0af026f446e192ec681cae51413d4" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.8.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "084906da91b4132050bf217661c40b19" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.8.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "e5c8f3b8d7bc024599ea7c12e86e12b6" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.9.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "eaad0fe95f377541e343e2b65c526166" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.9.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f32f91ad037860d6cd65911faac9277c" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.9.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "442e739c1bd8c0ad4a7528a629d4f556" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 56623104, + "records": [ + { + "name": "transformer.h.9.mixer.qkv_proj.weight", + "shape": [ + 9216, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 56623104, + "byteOffset": 0 + } + ], + "md5sum": "cba65d66e4a5aa25c15a78e798ac7d5f" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 19273728, + "records": [ + { + "name": "transformer.h.21.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6144 + }, + { + "name": "transformer.h.22.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12288 + }, + { + "name": "transformer.h.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18432 + }, + { + "name": "transformer.h.22.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 24576 + }, + { + "name": "transformer.h.23.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18898944 + }, + { + "name": "transformer.h.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18905088 + }, + { + "name": "transformer.h.24.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18911232 + }, + { + "name": "transformer.h.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18917376 + }, + { + "name": "transformer.h.25.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18923520 + }, + { + "name": "transformer.h.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18929664 + }, + { + "name": "transformer.h.26.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18935808 + }, + { + "name": "transformer.h.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18941952 + }, + { + "name": "transformer.h.27.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18948096 + }, + { + "name": "transformer.h.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18954240 + }, + { + "name": "transformer.h.28.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18960384 + }, + { + "name": "transformer.h.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18966528 + }, + { + "name": "transformer.h.29.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18972672 + }, + { + "name": "transformer.h.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18978816 + }, + { + "name": "transformer.h.30.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18984960 + }, + { + "name": "transformer.h.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18991104 + }, + { + "name": "transformer.h.31.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18997248 + }, + { + "name": "transformer.h.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19003392 + }, + { + "name": "transformer.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19009536 + }, + { + "name": "transformer.h.0.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19015680 + }, + { + "name": "transformer.h.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.1.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19027968 + }, + { + "name": "transformer.h.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.10.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19040256 + }, + { + "name": "transformer.h.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19046400 + }, + { + "name": "transformer.h.11.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19052544 + }, + { + "name": "transformer.h.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19058688 + }, + { + "name": "transformer.h.12.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19064832 + }, + { + "name": "transformer.h.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19070976 + }, + { + "name": "transformer.h.13.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19077120 + }, + { + "name": "transformer.h.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19083264 + }, + { + "name": "transformer.h.14.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19089408 + }, + { + "name": "transformer.h.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19095552 + }, + { + "name": "transformer.h.15.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19101696 + }, + { + "name": "transformer.h.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19107840 + }, + { + "name": "transformer.h.16.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19113984 + }, + { + "name": "transformer.h.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19120128 + }, + { + "name": "transformer.h.17.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19126272 + }, + { + "name": "transformer.h.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19132416 + }, + { + "name": "transformer.h.18.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19138560 + }, + { + "name": "transformer.h.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19144704 + }, + { + "name": "transformer.h.19.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19150848 + }, + { + "name": "transformer.h.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19156992 + }, + { + "name": "transformer.h.2.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19163136 + }, + { + "name": "transformer.h.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19169280 + }, + { + "name": "transformer.h.20.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19175424 + }, + { + "name": "transformer.h.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19181568 + }, + { + "name": "transformer.h.3.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19187712 + }, + { + "name": "transformer.h.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19193856 + }, + { + "name": "transformer.h.4.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19200000 + }, + { + "name": "transformer.h.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19206144 + }, + { + "name": "transformer.h.5.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19212288 + }, + { + "name": "transformer.h.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19218432 + }, + { + "name": "transformer.h.6.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19224576 + }, + { + "name": "transformer.h.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19230720 + }, + { + "name": "transformer.h.7.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19236864 + }, + { + "name": "transformer.h.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19243008 + }, + { + "name": "transformer.h.8.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19249152 + }, + { + "name": "transformer.h.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19255296 + }, + { + "name": "transformer.h.9.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19261440 + }, + { + "name": "transformer.h.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19267584 + } + ], + "md5sum": "f066d7c15dbf7a42f65846ff50a0ae06" + } + ] +} \ No newline at end of file