|
{ |
|
"metadata": { |
|
"ParamSize": 195, |
|
"ParamBytes": 15284318208.0, |
|
"BitsPerParam": 32.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 197001216, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
32064, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 197001216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50bfa5ca69130329b361691385220fd3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5320ace7ac0b0a390bf4c9f053311d97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74012f122c1ec2f103701543533ce0bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aff4cc0bd6d00be3914e2d6dad9dff63" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b46be99ba308714b55358ba510eda548" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2edaf5997449cc3bb1c7472e774a3efc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b87ea378fd79fbe0e9d83174bde981c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c0359df1eb3629ab1c5d17501e45bcd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "677315cc563f6247211c9e8a0db85e04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d34fb180b24ebc888075e803ab53954c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe2be387a8972d355503a4f155abe611" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d0757d3ec5beeddf5f79f0f6b3e6e8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "afc0bf61cea22bd8b80f6d0ea396af73" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4fe49456c619414d2d1a2134c268a8d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "55551bb35b49eb6003949724c62901d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "abb163e57f58bbc0fddd4b5a6f21e865" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f58c1e80baa1a7b700950b324fbd2a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e6ad3c958cd64e21edf03a84e226825" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e3ab2c5b4f3f258e2d2086ca3fcfc84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "398de83f6a8e8e82e956eed55a831575" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "478e5f04f2db7686aaef4ccd7d649eee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9955b6c8108950edab5ce695f6afbedb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05c790d28e0c5f7a1a785b5ad0fc8dff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51b705aa2332aaf12c98bb31e35c6b64" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b03e7c7b7da806ae2f20cbbbe32c9274" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "18750375127185ea31611d0751c8dff1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9012ddc20239409a8b6da5c4edcd39c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f2a620361744e1e67f6eadeeb637881" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3ee2bbb2fdd8d3b860d161c00aeaa1f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2993a57be85929c5ce3a4cc4febeca6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b86f0fb31845416bbcb570be11e82633" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "474a0fd92e2496373a6b66007123fab2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "82eb442aab225ad664bf0b9dadf43424" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91fb0e6d8fe7f89dbe222d23e249ee1b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "35e0419e9361572a80c3bf180404a3ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b8fd0d85ed6401072d48b986cf0b494" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d54d6ab6d5549015663590edba61e93d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed0885b0153a9d61c2f95a66a3519ad4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d97e0909d4d271b2fb35d11f4b89491f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f44c97ac001e3ba60f6a7fa4124ec6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b67aca569ec29f32924b44439ef3deea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e92e1f619b10b65da46e63e539a76e00" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d490ceb12f02af0806af999340806b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 197001216, |
|
"records": [ |
|
{ |
|
"name": "transformer.embd.weight", |
|
"shape": [ |
|
32064, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 197001216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98e31b824de2a90359588c4c1d133f38" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa23a45742b4a8734d04a848bc5dce2d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d36872f7d13a793c02c3d520239edc8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "990a52b9a6afe3baf5737517b933aa32" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef11dab97e3dbd2a8ec04353499fc996" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f2b3866d1f2d8acdc9e602b53f88c29" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4634b49d520d3233a9e625567fbf6ec5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a37018cea5505730876e890f602108c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66b739dd9ffd916561719aa89c29d352" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "31cfc680eb65c2c4342e60945a4d3025" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "24eca67eb7f9ec34a8072b989336546d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d07fe57bc52a8cf565da6e42698b30b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0cd4b084ab232fd263a5c010110cbc78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c5b43a9be2c1b610d4b1885f0b4918ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "515e27349424d6f503a0b4710a50a1c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "243c402b5c0e75896df090e8edf3bc16" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "975afaa8102855ffb2e2dbca6f4f737d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ae2b3b90abb1c04c26c866756ce4b16" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "680dc86ce9a6d0f81b7b3668693d0a37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3de6128fcfde6de1be2056b62fc10344" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3daaa6005290febec0043a3d93c086bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b873859cefe8a13f0379b283557ef631" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7aeb9426021eb90dfca1bcced0e5c9b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3872cfa2dc38057be8c3317387670c9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8281724f0bce8f2b8faa2ecb9127b7fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d23d5370cedee8fa9d547b5b72391a13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f273c821d5fa9246d5964fabed05f006" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b17bf24d2ecaa9e3fd9a557b42eaa01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30a7b4a98b66bd8c94769f751c096a6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87b612d712cea4ce8021382d1ff7371f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d2e23310591d6b2f39f18182a4f2f80" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be1c90137d1c70552235b65ba2690e35" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b0f3b239127d033b04ba33ac9f4c6ecc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c5c4e69e05ec692e5b29f70eea229690" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b37bab30c0bd90e9f69a34222f4dc90" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "986c9913a81568478f9f1158706bcb05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22001ffae6525704d046be2a2b370ffa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "62b461aa00eb4cb45cc9bd0f8422d879" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be1fca9a165f9e3e4a195fd01abb1b89" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77e53d73a7f31af495e796eef2cc7fc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1310b2773753ccb3fdcc326955d00894" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b47050e725a85feca863934905ec6add" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a5e54b79fb7914c426f9bf93404c356" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86ce4f8bfc3fd918ba54eeeec853b755" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a79dd6ba13b00aa8a62323f7394cbf6b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a8b0c94a263df59bbc6c451faf86edf3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e9854cb07af553876a929898a62f8f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df7545d510e105e65893ba029c2acc95" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "31c64523eec03d65e8935ac25fb4b256" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "936d1b6c6b485dfbb8e26e9a147f6f09" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab3fa7d6f5c2100649e9ea77ec90652a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15b69e210ff10e2d680ca4eebdc65ca8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c44ae5bf314fe42e3ba989df4db3e128" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ffd7adb486adbadf50234f3f67c8ee3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0dd7f68796d3cea391674247e0030d8a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d464950be8ca73b61d9f58d49e036688" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d69c5c9d6ea882631a156f1a0336ea2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e75ad58976ffc0d9b09abff9f29dfb4d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5a980f3c7bf607ae02408c83a99c601f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6e3645774c3fc993d3a4821b17ec21f1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96b41a784ef9216a66902703dc11c27c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32c326b4dc35732b88f9df3319843dba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5eec508ad5fba85101fc205ed762066d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3774b33ab24be517fdf08dc0d752b8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69f04230c67472b3b20ba250287bd515" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "243c217fdf81a62aca7387e1ec73bce2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b71643f474daf908e564754dbaa7deb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3efe4e1e9e7f64e67a3f5abffc0c2b98" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07654e58a6b17389eb8567eaafe4c5bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28b3c07bfb5f22bb0765ca3c65936e4c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bef25f868e3890bcac557b41b80a1ff6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "40394a4ea9c7e3cc3c20d385c01481be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b9d3c24ca1654a37a6c905ab49f8435" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe79e82ab3500a19fd4716de2cca6f89" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "573b89dfd22d5a1c70625fbeeb263522" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d8b154cfa184e289afcb517ac83bb90" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5e9e5710732010ad217d2d636ee74fa0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5a26bc4abe9754c6cb4cac4ae3d7b74c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3099c50d55019dade286d036c88826c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f30341c546eb5e481fb9aad9753478b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "35b2661d21ceac0a5c85841962ef95a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbaa07656668462057dfccece9543863" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba8303d69ba52aac7f004f18236ae801" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3124c456f54783777e507c3f0303e096" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b95d9f65ea5d06c34a5615a2aa32b951" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b4f85c5d6bf10526c3c160f1b2db4a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 19273728, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 6144 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 12288 |
|
}, |
|
{ |
|
"name": "transformer.h.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18432 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 18874368, |
|
"byteOffset": 24576 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18905088 |
|
}, |
|
{ |
|
"name": "transformer.h.24.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18911232 |
|
}, |
|
{ |
|
"name": "transformer.h.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18917376 |
|
}, |
|
{ |
|
"name": "transformer.h.25.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18923520 |
|
}, |
|
{ |
|
"name": "transformer.h.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18929664 |
|
}, |
|
{ |
|
"name": "transformer.h.26.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18935808 |
|
}, |
|
{ |
|
"name": "transformer.h.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18941952 |
|
}, |
|
{ |
|
"name": "transformer.h.27.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18948096 |
|
}, |
|
{ |
|
"name": "transformer.h.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18954240 |
|
}, |
|
{ |
|
"name": "transformer.h.28.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18960384 |
|
}, |
|
{ |
|
"name": "transformer.h.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18966528 |
|
}, |
|
{ |
|
"name": "transformer.h.29.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18972672 |
|
}, |
|
{ |
|
"name": "transformer.h.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18978816 |
|
}, |
|
{ |
|
"name": "transformer.h.30.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18984960 |
|
}, |
|
{ |
|
"name": "transformer.h.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18991104 |
|
}, |
|
{ |
|
"name": "transformer.h.31.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 18997248 |
|
}, |
|
{ |
|
"name": "transformer.h.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19003392 |
|
}, |
|
{ |
|
"name": "transformer.norm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19009536 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19015680 |
|
}, |
|
{ |
|
"name": "transformer.h.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19027968 |
|
}, |
|
{ |
|
"name": "transformer.h.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19040256 |
|
}, |
|
{ |
|
"name": "transformer.h.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19046400 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19052544 |
|
}, |
|
{ |
|
"name": "transformer.h.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19058688 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19064832 |
|
}, |
|
{ |
|
"name": "transformer.h.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19070976 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "transformer.h.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19083264 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19089408 |
|
}, |
|
{ |
|
"name": "transformer.h.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19095552 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19101696 |
|
}, |
|
{ |
|
"name": "transformer.h.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19107840 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19113984 |
|
}, |
|
{ |
|
"name": "transformer.h.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19120128 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19126272 |
|
}, |
|
{ |
|
"name": "transformer.h.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19132416 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19138560 |
|
}, |
|
{ |
|
"name": "transformer.h.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19144704 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19150848 |
|
}, |
|
{ |
|
"name": "transformer.h.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19156992 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19163136 |
|
}, |
|
{ |
|
"name": "transformer.h.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19169280 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19175424 |
|
}, |
|
{ |
|
"name": "transformer.h.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19181568 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19187712 |
|
}, |
|
{ |
|
"name": "transformer.h.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19193856 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19200000 |
|
}, |
|
{ |
|
"name": "transformer.h.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19206144 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19212288 |
|
}, |
|
{ |
|
"name": "transformer.h.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19218432 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19224576 |
|
}, |
|
{ |
|
"name": "transformer.h.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19230720 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19236864 |
|
}, |
|
{ |
|
"name": "transformer.h.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19243008 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19249152 |
|
}, |
|
{ |
|
"name": "transformer.h.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19255296 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19261440 |
|
}, |
|
{ |
|
"name": "transformer.h.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6144, |
|
"byteOffset": 19267584 |
|
} |
|
], |
|
"md5sum": "d0cea00b9c73d5b217df8458917b4aa3" |
|
} |
|
] |
|
} |