diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1440a446c713a3da2ca01996e3b430afabfb95ed --- /dev/null +++ b/config.json @@ -0,0 +1,32 @@ +{ + "_name_or_path": "huge", + "_remove_final_layer_norm": false, + "activation_dropout": 0.0, + "activation_function": "gelu", + "architectures": [ + "OPTForCausalLM" + ], + "attention_dropout": 0.1, + "bias": false, + "bos_token_id": 0, + "do_layer_norm_before": true, + "dropout": 0.1, + "eos_token_id": 2, + "ffn_dim": 40960, + "hidden_size": 10240, + "init_std": 0.02, + "layer_norm_elementwise_affine": false, + "layerdrop": 0.0, + "learned_embeddings": true, + "max_position_embeddings": 2048, + "model_type": "opt", + "num_attention_heads": 80, + "num_hidden_layers": 96, + "pad_token_id": 1, + "scale_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.24.0", + "use_cache": true, + "vocab_size": 50000, + "word_embed_proj_dim": 10240 +} diff --git a/pytorch_model-00001-of-00053.bin b/pytorch_model-00001-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae08edb6d38e9f326c3c64b87f6a1a1ec8e58172 --- /dev/null +++ b/pytorch_model-00001-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59b3a886e5aa10edbb30abf036990ef58825e045bb04e8b58209b6b7d3bbf55 +size 8843724059 diff --git a/pytorch_model-00002-of-00053.bin b/pytorch_model-00002-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..6196ebe58d3c8127293c0f30d59ba7c74db41c36 --- /dev/null +++ b/pytorch_model-00002-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1e4d079d4798fe416ef239cd783429dcf025cdafd2433264a59336e8564b3b +size 9647850097 diff --git a/pytorch_model-00003-of-00053.bin b/pytorch_model-00003-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..10e09da8f5048795bc981e947287fe9d7029dc1f --- /dev/null +++ b/pytorch_model-00003-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27411f3684c214b3bed915bb50ff6db5e5e664d7fab89153d3565cee12a98d06 +size 9647932551 diff --git a/pytorch_model-00004-of-00053.bin b/pytorch_model-00004-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..acbf94ede032add9e7c9e83539f3a8880e347e70 --- /dev/null +++ b/pytorch_model-00004-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fdc5096fe4d093955fe9d63b31d7ad7de4677bb65d73219a96f4c279fad339d +size 9647932527 diff --git a/pytorch_model-00005-of-00053.bin b/pytorch_model-00005-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc221645e0953c717856bbddef94588137cd182 --- /dev/null +++ b/pytorch_model-00005-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2014f932d96d19987fcfaa5c480ad754373e8c8dd0ca0e0232eb53e0433295 +size 9647932527 diff --git a/pytorch_model-00006-of-00053.bin b/pytorch_model-00006-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e5c3d5abc7bffd9205de729cab3973104bad0f8 --- /dev/null +++ b/pytorch_model-00006-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e92b24a150be4f09c4863553f55d42752cc3d0ce7d481851754b6462a3922e9 +size 8389558961 diff --git a/pytorch_model-00007-of-00053.bin b/pytorch_model-00007-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..b23ed7da62eaae8d99a62e8c0b1b4e6eb32cb048 --- /dev/null +++ b/pytorch_model-00007-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc2c149b20f1aaa2741fa7d55da6fc0d81e936e24d27eea9a46885d474768b0 +size 8389518683 diff --git a/pytorch_model-00008-of-00053.bin b/pytorch_model-00008-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6be8030ead611a3583372308a6c6ca14ae3e417 --- /dev/null +++ b/pytorch_model-00008-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2985b25da8b2d5864e86da483910a445bbb4a7c426888c3f09e4cbbd076d75a2 +size 9647850161 diff --git a/pytorch_model-00009-of-00053.bin b/pytorch_model-00009-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..f6c307239dc94b6982e3b2d11c365789208d7974 --- /dev/null +++ b/pytorch_model-00009-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a7eddf45ec1c4c287c9628857fd1eed0b78adbd305ee2c1491e78dbe99e297 +size 9647932551 diff --git a/pytorch_model-00010-of-00053.bin b/pytorch_model-00010-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fb9f77301a6cde871d0324e58613a24f42e4121 --- /dev/null +++ b/pytorch_model-00010-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b17642319c68abfa6842133c420126090e4cc4475eac0bca3cd61cd0009519b +size 9647932527 diff --git a/pytorch_model-00011-of-00053.bin b/pytorch_model-00011-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..5646d3995a6e3f5cd4720fb5bf73358ffc22c0c2 --- /dev/null +++ b/pytorch_model-00011-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b2af6cb662d28403e110c39fe625319f176f870758a0d60454a7c77455d45e +size 9647932527 diff --git a/pytorch_model-00012-of-00053.bin b/pytorch_model-00012-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab0c19a77312fb98c7d75ba6419fa0978686f8ba --- /dev/null +++ b/pytorch_model-00012-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f1e6c1dacfa33e94cb3434243913a2139c16b8462a1fa4be79e3c326de3858 +size 8389558961 diff --git a/pytorch_model-00013-of-00053.bin b/pytorch_model-00013-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..f869301d64de267967bc7efcf32a44996745ce78 --- /dev/null +++ b/pytorch_model-00013-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b427688b8876bfb01199d321bac1d214a839f6f229e84e9ac930e215616a8ec7 +size 8389518683 diff --git a/pytorch_model-00014-of-00053.bin b/pytorch_model-00014-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..781c37dc8886073273a94389a59574778cbae821 --- /dev/null +++ b/pytorch_model-00014-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:439d9faaec3ebde80e9e16d6b57e7aeee702b79b65f0dd0bf3b7476ff4bd01c7 +size 9647850161 diff --git a/pytorch_model-00015-of-00053.bin b/pytorch_model-00015-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4e56d508e5b7adb20949b1c3b2e2b4560fd7e0f --- /dev/null +++ b/pytorch_model-00015-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0088e7c2f90924a001120d96abc152da76593f04d8ad2cb8402cd265611d7249 +size 9647932551 diff --git a/pytorch_model-00016-of-00053.bin b/pytorch_model-00016-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..886b7b66d41d28ea68984609549f3f852a3c6368 --- /dev/null +++ b/pytorch_model-00016-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:332baebd649bfdf03da54d98486b3ecaa61a2aec679eccb42ca4f276a66c2468 +size 9647932527 diff --git a/pytorch_model-00017-of-00053.bin b/pytorch_model-00017-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..4041e41cf5aebb7e24cc251de5eaa91712dbbda6 --- /dev/null +++ b/pytorch_model-00017-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d71304be21ce2878edec937bfe93b12808c16befcb68eba647d3291ba92446 +size 9647932527 diff --git a/pytorch_model-00018-of-00053.bin b/pytorch_model-00018-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fc378806d2347ddc5abd9ed9892cde029926517 --- /dev/null +++ b/pytorch_model-00018-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac68442c8ac028e33c4d32253f9dbecf73cbe92ad1c51c006d08a277ba2fa3c +size 8389558961 diff --git a/pytorch_model-00019-of-00053.bin b/pytorch_model-00019-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..441352678800b181ed76613b6a2802bff6b132fb --- /dev/null +++ b/pytorch_model-00019-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4947319c1c2a826ae2dd4092d47ad29ac76f2d532b1f79721c785b27ce7e51e +size 8389518683 diff --git a/pytorch_model-00020-of-00053.bin b/pytorch_model-00020-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..290729b6da0dc765290902fcc756c92b2b02c834 --- /dev/null +++ b/pytorch_model-00020-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e31d1a7b8489c34a73367ca7ca8ecd3857c8c9c411d7fdfbe2f54e6b6e449d1f +size 9647850161 diff --git a/pytorch_model-00021-of-00053.bin b/pytorch_model-00021-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3db08eb2385ca057dfe38dd7d43a80089a9297c --- /dev/null +++ b/pytorch_model-00021-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc1e03919c55300cfafcba4faf2ba71b94347f20c739b93845c5159eda973c3b +size 9647932551 diff --git a/pytorch_model-00022-of-00053.bin b/pytorch_model-00022-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..967b15039fc27407a9ffc4f97b7a2795c7d9b424 --- /dev/null +++ b/pytorch_model-00022-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e8394911844569d6921e897e9aeb40576a97e74fcc19a09f982b6eb0cfdd93 +size 9647932527 diff --git a/pytorch_model-00023-of-00053.bin b/pytorch_model-00023-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..cd7b626d5179856440f8902f3a947af7cb0d2bc4 --- /dev/null +++ b/pytorch_model-00023-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc605b13a972a6697ddd942b09b8d140fa38a4604ce4d5329227e22d1dddbec5 +size 9647932527 diff --git a/pytorch_model-00024-of-00053.bin b/pytorch_model-00024-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..93cd4759323f25383750abea833d69e8e062a5b3 --- /dev/null +++ b/pytorch_model-00024-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acb0c1ff775ecc798d18863419ca4aa06821f14847f1e596b149390f17c45868 +size 8389558961 diff --git a/pytorch_model-00025-of-00053.bin b/pytorch_model-00025-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..fb1361081394f30bb923860d94675947d58debb6 --- /dev/null +++ b/pytorch_model-00025-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c22ff38dd4bd849bd77205fa40c942cce251b6fa48eeec7dd562a22cd34095 +size 8389518683 diff --git a/pytorch_model-00026-of-00053.bin b/pytorch_model-00026-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..87c97498c83e1efc1f84927da63d13a5e85de5ae --- /dev/null +++ b/pytorch_model-00026-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:188d6daa7773d5a55c3b26b99513d1820f4125f308c82e187801afc429459a64 +size 9647850161 diff --git a/pytorch_model-00027-of-00053.bin b/pytorch_model-00027-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..ed01716833f41aad8761ad2731866d689f0fccba --- /dev/null +++ b/pytorch_model-00027-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de38d2ef277337a1aad49674577c2f3309b2acb9de6f4a1c0436ac1846db1729 +size 9647932551 diff --git a/pytorch_model-00028-of-00053.bin b/pytorch_model-00028-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..a39e009bd57cf5d7ef33943da8d1532e54aa0bea --- /dev/null +++ b/pytorch_model-00028-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed57a657194b6f763c8b07eb319d7a01764198a138feddd5ed0e93703191bdd0 +size 9647932527 diff --git a/pytorch_model-00029-of-00053.bin b/pytorch_model-00029-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..220f470f243d9f752ab244120cffd1aefb3b76d5 --- /dev/null +++ b/pytorch_model-00029-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1192a4ad8f99abd991854a6bdac96b59426df325be6e597c4abd9262efa39e5 +size 9647932527 diff --git a/pytorch_model-00030-of-00053.bin b/pytorch_model-00030-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fa067514feff9cc64693326a1042557098817a8 --- /dev/null +++ b/pytorch_model-00030-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8e96f0fb352fe4ce57e78041d6eefc26b5c05b393326393e479a136b2192e0 +size 8389558961 diff --git a/pytorch_model-00031-of-00053.bin b/pytorch_model-00031-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..2afc24d26073a48ef1855b7857ada698606a049e --- /dev/null +++ b/pytorch_model-00031-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd3da23a723f093eae8d277bfdfa62cdc05a8fd8cfbaf402182105f239e3933f +size 8389518683 diff --git a/pytorch_model-00032-of-00053.bin b/pytorch_model-00032-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..02f5d97931a3e31df57ceae4d71c7b1ec35dfcd7 --- /dev/null +++ b/pytorch_model-00032-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c32df5861d52b9aa5614c59641c90103b79aec6e5c951f6c20637ba441fd8dd +size 9647850161 diff --git a/pytorch_model-00033-of-00053.bin b/pytorch_model-00033-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..26f032e1d9b99a9582f778b694aeb7260814ff31 --- /dev/null +++ b/pytorch_model-00033-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f696d41065eb596da0d45224fefa65ee8d654442a46725248f1a6f098efd39f3 +size 9647932551 diff --git a/pytorch_model-00034-of-00053.bin b/pytorch_model-00034-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0998601dd092eebfc0577452520c4331b864f3f --- /dev/null +++ b/pytorch_model-00034-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ac49ae47d578c79dc30f4a9f6634e7deae50258cb40f8df10c91432a1be384 +size 9647932527 diff --git a/pytorch_model-00035-of-00053.bin b/pytorch_model-00035-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce7ebcfaacd5b97c56039804a2567d63b2a595ad --- /dev/null +++ b/pytorch_model-00035-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57668df277444ea9512a3a459395918fc8f665ccfd876c5ebec444d5ce73049d +size 9647932527 diff --git a/pytorch_model-00036-of-00053.bin b/pytorch_model-00036-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..3cd4b34d57ccf7bbfc412204798802876fe845e4 --- /dev/null +++ b/pytorch_model-00036-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e73d8d258aba60ebe1a21bf761ed4323bd12190748a89d1586f2efd35bb05171 +size 8389558961 diff --git a/pytorch_model-00037-of-00053.bin b/pytorch_model-00037-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..febfa8a7791bef2f0c5503199c795c3b7e8d3dcb --- /dev/null +++ b/pytorch_model-00037-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138ccd7e66807c942a6c3b19ed0afca424350d149bd4c21f34f066756c33cb42 +size 8389518683 diff --git a/pytorch_model-00038-of-00053.bin b/pytorch_model-00038-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..143f5618e23526b3a444e1eb8cc493b11896dbd4 --- /dev/null +++ b/pytorch_model-00038-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8901f5e0d834ed11effa0544692757cbdce8a8448d8d8d07ca9165af223e8f9 +size 9647850161 diff --git a/pytorch_model-00039-of-00053.bin b/pytorch_model-00039-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..62c81ed794c5404959e1e464c6160cd5317c4854 --- /dev/null +++ b/pytorch_model-00039-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ad17ed87f80c6ef2dc37b42b3eb315c97a7a95c8c97f9e2624e7ffb7dc4fc94 +size 9647932551 diff --git a/pytorch_model-00040-of-00053.bin b/pytorch_model-00040-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..c7d572bec203f4df7359717b515a23fb7ddb5629 --- /dev/null +++ b/pytorch_model-00040-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d337e9c0e55050c7f20800077ecb4e33664a66c2f5c1d393b3f624aa84349e +size 9647932527 diff --git a/pytorch_model-00041-of-00053.bin b/pytorch_model-00041-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..27e7d07c8dffe352812b66ac020fd86686074278 --- /dev/null +++ b/pytorch_model-00041-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b91b32f5472d0fe3750884151db6c29e92922b11d1c8dffe82181ddabd111a1b +size 9647932527 diff --git a/pytorch_model-00042-of-00053.bin b/pytorch_model-00042-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..2cfdfd02372629b8d152168896404098307927d1 --- /dev/null +++ b/pytorch_model-00042-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b89d52d795648a42e8d4b205079d37698c1ade319f032d3773f858163f429a +size 8389558961 diff --git a/pytorch_model-00043-of-00053.bin b/pytorch_model-00043-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f07e13f182f5e0c6091dbaad17e0cdcd6ffc027 --- /dev/null +++ b/pytorch_model-00043-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a6126fde8e1ff72db4ac230ccddc74d76ff1923b188b6843ef2e166dd86b3a6 +size 8389518683 diff --git a/pytorch_model-00044-of-00053.bin b/pytorch_model-00044-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..f24026b0c3c234a1ce9603c0f96475b66262886d --- /dev/null +++ b/pytorch_model-00044-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6fe592118641a4655e276dd97898365365690a3d70c0484b6f3579055069a1 +size 9647850161 diff --git a/pytorch_model-00045-of-00053.bin b/pytorch_model-00045-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..7751a2c1ee783dfcf21cf3bf9776ba3382ab5568 --- /dev/null +++ b/pytorch_model-00045-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1187a24d64f28964976bd2ba5707ab017a6de42539d133ff393164286c1ae872 +size 9647932551 diff --git a/pytorch_model-00046-of-00053.bin b/pytorch_model-00046-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..f699231d057772661ca7eb20622adfee8501236a --- /dev/null +++ b/pytorch_model-00046-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42dda95f62e3a8ca9d9b273c6115239e209dc6dcc1e3c6f1d8ca813b3c55c962 +size 9647932527 diff --git a/pytorch_model-00047-of-00053.bin b/pytorch_model-00047-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4184b6dc6fc2cb657bb1cb6356611a7dd1771a9 --- /dev/null +++ b/pytorch_model-00047-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8709f68d2f0f5e86ab9b4c1125da6c9645cdc99d1790cf9829bee100f9bbaf15 +size 9647932527 diff --git a/pytorch_model-00048-of-00053.bin b/pytorch_model-00048-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..5eb17f48ad8a1f34eb4b825506dde286aff1f43d --- /dev/null +++ b/pytorch_model-00048-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b08cb4b5da8cf2ac29418b8818ffc447921a2ed012c03e6e2e2296c91fadeb97 +size 8389558961 diff --git a/pytorch_model-00049-of-00053.bin b/pytorch_model-00049-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e269094c33622170ad793db54f9a4f3debc3aff --- /dev/null +++ b/pytorch_model-00049-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05d80333d36eb4971ef0e8ed39fcc78389a628d4434c9e200560113e4c5df2a +size 8389518683 diff --git a/pytorch_model-00050-of-00053.bin b/pytorch_model-00050-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..2fa50dc43ec9c689203145144242a7c9ef96df9d --- /dev/null +++ b/pytorch_model-00050-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5e10275ea2c0f87758a159da7cd678953161593215cab977f404663efa7230 +size 9647850161 diff --git a/pytorch_model-00051-of-00053.bin b/pytorch_model-00051-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..3ed5150d2c8fa54c6793a012a27f66d8b3da4aee --- /dev/null +++ b/pytorch_model-00051-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99489b3fa09114679a0519905f2c884152e109035da8dd39413726b50b01b2a2 +size 9647932551 diff --git a/pytorch_model-00052-of-00053.bin b/pytorch_model-00052-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..5123aeb313c47419788298c265d6fc7d08cb8dc9 --- /dev/null +++ b/pytorch_model-00052-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b74b6aade1539089e63755ba55593db84ffb38b6fbf70357873664616a3c58 +size 9647932527 diff --git a/pytorch_model-00053-of-00053.bin b/pytorch_model-00053-of-00053.bin new file mode 100644 index 0000000000000000000000000000000000000000..599411fffc454f7ef215d49e3e056eec56d34e16 --- /dev/null +++ b/pytorch_model-00053-of-00053.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:151a21fb0546c317a304531ee0de6e7d4bc736ae10dc7334291c74b58086ef12 +size 6662230752 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..75a4714fb8683ea9b90647a480aa5d94b9af3b8a --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,1548 @@ +{ + "metadata": { + "total_size": 487414988800 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00053-of-00053.bin", + "model.decoder.embed_positions.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.embed_tokens.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.final_layer_norm.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.final_layer_norm.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.fc1.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.fc1.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.fc2.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.fc2.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.final_layer_norm.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.final_layer_norm.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn.k_proj.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn.out_proj.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn.out_proj.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn.q_proj.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn.v_proj.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn_layer_norm.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.0.self_attn_layer_norm.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.fc1.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.1.fc1.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.1.fc2.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.1.fc2.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.1.final_layer_norm.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.1.final_layer_norm.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.1.self_attn.k_proj.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn.out_proj.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn.out_proj.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn.q_proj.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn.v_proj.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn_layer_norm.bias": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.1.self_attn_layer_norm.weight": "pytorch_model-00001-of-00053.bin", + "model.decoder.layers.10.fc1.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.fc1.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.fc2.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.10.fc2.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.10.final_layer_norm.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.10.final_layer_norm.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.10.self_attn.k_proj.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn.k_proj.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn.out_proj.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn.out_proj.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn.q_proj.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn.q_proj.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn.v_proj.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn.v_proj.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn_layer_norm.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.10.self_attn_layer_norm.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.11.fc1.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.fc1.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.fc2.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.fc2.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.final_layer_norm.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.final_layer_norm.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn.k_proj.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn.k_proj.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn.out_proj.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn.out_proj.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn.q_proj.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn.q_proj.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn.v_proj.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn.v_proj.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn_layer_norm.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.11.self_attn_layer_norm.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.fc1.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.12.fc1.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.12.fc2.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.12.fc2.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.12.final_layer_norm.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.12.final_layer_norm.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.12.self_attn.k_proj.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn.k_proj.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn.out_proj.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn.out_proj.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn.q_proj.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn.q_proj.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn.v_proj.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn.v_proj.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn_layer_norm.bias": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.12.self_attn_layer_norm.weight": "pytorch_model-00007-of-00053.bin", + "model.decoder.layers.13.fc1.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.fc1.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.fc2.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.fc2.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.final_layer_norm.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.final_layer_norm.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn.k_proj.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn.k_proj.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn.out_proj.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn.out_proj.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn.q_proj.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn.q_proj.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn.v_proj.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn.v_proj.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn_layer_norm.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.13.self_attn_layer_norm.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.14.fc1.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.fc1.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.fc2.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.fc2.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.final_layer_norm.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.final_layer_norm.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.self_attn.k_proj.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.14.self_attn.k_proj.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.14.self_attn.out_proj.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.self_attn.out_proj.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.self_attn.q_proj.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.14.self_attn.q_proj.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.14.self_attn.v_proj.bias": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.14.self_attn.v_proj.weight": "pytorch_model-00008-of-00053.bin", + "model.decoder.layers.14.self_attn_layer_norm.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.14.self_attn_layer_norm.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.fc1.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.fc1.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.fc2.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.fc2.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.final_layer_norm.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.final_layer_norm.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn.k_proj.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn.k_proj.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn.out_proj.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn.out_proj.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn.q_proj.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn.q_proj.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn.v_proj.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn.v_proj.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn_layer_norm.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.15.self_attn_layer_norm.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.16.fc1.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.fc1.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.fc2.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.fc2.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.final_layer_norm.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.final_layer_norm.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.self_attn.k_proj.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.16.self_attn.k_proj.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.16.self_attn.out_proj.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.self_attn.out_proj.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.self_attn.q_proj.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.self_attn.q_proj.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.self_attn.v_proj.bias": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.16.self_attn.v_proj.weight": "pytorch_model-00009-of-00053.bin", + "model.decoder.layers.16.self_attn_layer_norm.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.16.self_attn_layer_norm.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.fc1.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.fc1.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.fc2.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.fc2.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.final_layer_norm.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.final_layer_norm.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn.k_proj.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn.k_proj.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn.out_proj.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn.out_proj.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn.q_proj.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn.q_proj.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn.v_proj.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn.v_proj.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn_layer_norm.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.17.self_attn_layer_norm.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.18.fc1.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.fc1.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.fc2.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.fc2.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.final_layer_norm.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.final_layer_norm.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.self_attn.k_proj.bias": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.18.self_attn.k_proj.weight": "pytorch_model-00010-of-00053.bin", + "model.decoder.layers.18.self_attn.out_proj.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.self_attn.out_proj.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.self_attn.q_proj.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.self_attn.q_proj.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.self_attn.v_proj.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.self_attn.v_proj.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.self_attn_layer_norm.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.18.self_attn_layer_norm.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.fc1.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.fc1.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.fc2.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.fc2.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.final_layer_norm.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.final_layer_norm.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn.k_proj.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn.k_proj.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn.out_proj.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn.out_proj.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn.q_proj.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn.q_proj.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn.v_proj.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn.v_proj.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn_layer_norm.bias": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.19.self_attn_layer_norm.weight": "pytorch_model-00011-of-00053.bin", + "model.decoder.layers.2.fc1.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.fc1.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.fc2.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.fc2.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.final_layer_norm.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.final_layer_norm.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn.k_proj.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn.k_proj.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn.out_proj.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn.out_proj.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn.q_proj.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn.q_proj.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn.v_proj.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn.v_proj.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn_layer_norm.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.2.self_attn_layer_norm.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.20.fc1.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.fc1.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.fc2.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.fc2.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.final_layer_norm.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.final_layer_norm.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn.k_proj.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn.k_proj.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn.out_proj.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn.out_proj.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn.q_proj.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn.q_proj.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn.v_proj.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn.v_proj.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn_layer_norm.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.20.self_attn_layer_norm.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.fc1.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.fc1.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.fc2.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.21.fc2.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.21.final_layer_norm.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.21.final_layer_norm.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.21.self_attn.k_proj.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn.k_proj.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn.out_proj.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn.out_proj.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn.q_proj.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn.q_proj.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn.v_proj.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn.v_proj.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn_layer_norm.bias": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.21.self_attn_layer_norm.weight": "pytorch_model-00012-of-00053.bin", + "model.decoder.layers.22.fc1.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.fc1.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.fc2.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.fc2.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.final_layer_norm.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.final_layer_norm.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn.k_proj.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn.k_proj.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn.out_proj.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn.out_proj.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn.q_proj.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn.q_proj.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn.v_proj.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn.v_proj.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn_layer_norm.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.22.self_attn_layer_norm.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.fc1.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.23.fc1.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.23.fc2.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.23.fc2.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.23.final_layer_norm.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.23.final_layer_norm.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.23.self_attn.k_proj.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn.k_proj.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn.out_proj.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn.out_proj.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn.q_proj.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn.q_proj.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn.v_proj.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn.v_proj.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn_layer_norm.bias": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.23.self_attn_layer_norm.weight": "pytorch_model-00013-of-00053.bin", + "model.decoder.layers.24.fc1.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.fc1.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.fc2.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.fc2.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.final_layer_norm.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.final_layer_norm.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn.k_proj.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn.k_proj.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn.out_proj.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn.out_proj.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn.q_proj.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn.q_proj.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn.v_proj.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn.v_proj.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn_layer_norm.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.24.self_attn_layer_norm.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.25.fc1.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.fc1.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.fc2.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.fc2.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.final_layer_norm.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.final_layer_norm.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.self_attn.k_proj.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.25.self_attn.k_proj.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.25.self_attn.out_proj.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.self_attn.out_proj.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.self_attn.q_proj.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.25.self_attn.q_proj.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.25.self_attn.v_proj.bias": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.25.self_attn.v_proj.weight": "pytorch_model-00014-of-00053.bin", + "model.decoder.layers.25.self_attn_layer_norm.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.25.self_attn_layer_norm.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.fc1.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.fc1.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.fc2.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.fc2.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.final_layer_norm.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.final_layer_norm.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn.k_proj.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn.k_proj.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn.out_proj.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn.out_proj.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn.q_proj.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn.q_proj.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn.v_proj.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn.v_proj.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn_layer_norm.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.26.self_attn_layer_norm.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.27.fc1.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.fc1.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.fc2.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.fc2.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.final_layer_norm.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.final_layer_norm.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.self_attn.k_proj.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.27.self_attn.k_proj.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.27.self_attn.out_proj.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.self_attn.out_proj.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.self_attn.q_proj.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.self_attn.q_proj.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.self_attn.v_proj.bias": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.27.self_attn.v_proj.weight": "pytorch_model-00015-of-00053.bin", + "model.decoder.layers.27.self_attn_layer_norm.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.27.self_attn_layer_norm.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.fc1.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.fc1.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.fc2.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.fc2.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.final_layer_norm.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.final_layer_norm.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn.k_proj.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn.k_proj.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn.out_proj.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn.out_proj.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn.q_proj.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn.q_proj.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn.v_proj.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn.v_proj.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn_layer_norm.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.28.self_attn_layer_norm.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.29.fc1.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.fc1.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.fc2.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.fc2.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.final_layer_norm.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.final_layer_norm.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.self_attn.k_proj.bias": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.29.self_attn.k_proj.weight": "pytorch_model-00016-of-00053.bin", + "model.decoder.layers.29.self_attn.out_proj.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.self_attn.out_proj.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.self_attn.q_proj.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.self_attn.q_proj.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.self_attn.v_proj.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.self_attn.v_proj.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.self_attn_layer_norm.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.29.self_attn_layer_norm.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.3.fc1.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.fc1.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.fc2.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.fc2.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.final_layer_norm.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.final_layer_norm.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.self_attn.k_proj.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.3.self_attn.out_proj.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.self_attn.out_proj.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.self_attn.q_proj.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.3.self_attn.v_proj.bias": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00053.bin", + "model.decoder.layers.3.self_attn_layer_norm.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.3.self_attn_layer_norm.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.30.fc1.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.fc1.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.fc2.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.fc2.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.final_layer_norm.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.final_layer_norm.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn.k_proj.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn.k_proj.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn.out_proj.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn.out_proj.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn.q_proj.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn.q_proj.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn.v_proj.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn.v_proj.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn_layer_norm.bias": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.30.self_attn_layer_norm.weight": "pytorch_model-00017-of-00053.bin", + "model.decoder.layers.31.fc1.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.fc1.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.fc2.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.fc2.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.final_layer_norm.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.final_layer_norm.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn.k_proj.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn.k_proj.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn.out_proj.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn.out_proj.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn.q_proj.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn.q_proj.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn.v_proj.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn.v_proj.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn_layer_norm.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.31.self_attn_layer_norm.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.fc1.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.fc1.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.fc2.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.32.fc2.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.32.final_layer_norm.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.32.final_layer_norm.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.32.self_attn.k_proj.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn.k_proj.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn.out_proj.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn.out_proj.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn.q_proj.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn.q_proj.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn.v_proj.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn.v_proj.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn_layer_norm.bias": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.32.self_attn_layer_norm.weight": "pytorch_model-00018-of-00053.bin", + "model.decoder.layers.33.fc1.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.fc1.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.fc2.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.fc2.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.final_layer_norm.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.final_layer_norm.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn.k_proj.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn.k_proj.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn.out_proj.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn.out_proj.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn.q_proj.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn.q_proj.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn.v_proj.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn.v_proj.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn_layer_norm.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.33.self_attn_layer_norm.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.fc1.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.34.fc1.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.34.fc2.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.34.fc2.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.34.final_layer_norm.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.34.final_layer_norm.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.34.self_attn.k_proj.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn.k_proj.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn.out_proj.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn.out_proj.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn.q_proj.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn.q_proj.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn.v_proj.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn.v_proj.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn_layer_norm.bias": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.34.self_attn_layer_norm.weight": "pytorch_model-00019-of-00053.bin", + "model.decoder.layers.35.fc1.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.fc1.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.fc2.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.fc2.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.final_layer_norm.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.final_layer_norm.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn.k_proj.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn.k_proj.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn.out_proj.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn.out_proj.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn.q_proj.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn.q_proj.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn.v_proj.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn.v_proj.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn_layer_norm.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.35.self_attn_layer_norm.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.36.fc1.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.fc1.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.fc2.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.fc2.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.final_layer_norm.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.final_layer_norm.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.self_attn.k_proj.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.36.self_attn.k_proj.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.36.self_attn.out_proj.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.self_attn.out_proj.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.self_attn.q_proj.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.36.self_attn.q_proj.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.36.self_attn.v_proj.bias": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.36.self_attn.v_proj.weight": "pytorch_model-00020-of-00053.bin", + "model.decoder.layers.36.self_attn_layer_norm.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.36.self_attn_layer_norm.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.fc1.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.fc1.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.fc2.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.fc2.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.final_layer_norm.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.final_layer_norm.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn.k_proj.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn.k_proj.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn.out_proj.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn.out_proj.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn.q_proj.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn.q_proj.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn.v_proj.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn.v_proj.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn_layer_norm.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.37.self_attn_layer_norm.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.38.fc1.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.fc1.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.fc2.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.fc2.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.final_layer_norm.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.final_layer_norm.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.self_attn.k_proj.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.38.self_attn.k_proj.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.38.self_attn.out_proj.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.self_attn.out_proj.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.self_attn.q_proj.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.self_attn.q_proj.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.self_attn.v_proj.bias": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.38.self_attn.v_proj.weight": "pytorch_model-00021-of-00053.bin", + "model.decoder.layers.38.self_attn_layer_norm.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.38.self_attn_layer_norm.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.fc1.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.fc1.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.fc2.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.fc2.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.final_layer_norm.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.final_layer_norm.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn.k_proj.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn.k_proj.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn.out_proj.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn.out_proj.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn.q_proj.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn.q_proj.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn.v_proj.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn.v_proj.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn_layer_norm.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.39.self_attn_layer_norm.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.4.fc1.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.fc1.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.fc2.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.fc2.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.final_layer_norm.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.final_layer_norm.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn.k_proj.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn.k_proj.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn.out_proj.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn.out_proj.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn.q_proj.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn.q_proj.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn.v_proj.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn.v_proj.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn_layer_norm.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.4.self_attn_layer_norm.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.40.fc1.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.fc1.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.fc2.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.fc2.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.final_layer_norm.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.final_layer_norm.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.self_attn.k_proj.bias": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.40.self_attn.k_proj.weight": "pytorch_model-00022-of-00053.bin", + "model.decoder.layers.40.self_attn.out_proj.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.self_attn.out_proj.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.self_attn.q_proj.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.self_attn.q_proj.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.self_attn.v_proj.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.self_attn.v_proj.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.self_attn_layer_norm.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.40.self_attn_layer_norm.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.fc1.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.fc1.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.fc2.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.fc2.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.final_layer_norm.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.final_layer_norm.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn.k_proj.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn.k_proj.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn.out_proj.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn.out_proj.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn.q_proj.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn.q_proj.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn.v_proj.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn.v_proj.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn_layer_norm.bias": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.41.self_attn_layer_norm.weight": "pytorch_model-00023-of-00053.bin", + "model.decoder.layers.42.fc1.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.fc1.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.fc2.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.fc2.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.final_layer_norm.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.final_layer_norm.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn.k_proj.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn.k_proj.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn.out_proj.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn.out_proj.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn.q_proj.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn.q_proj.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn.v_proj.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn.v_proj.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn_layer_norm.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.42.self_attn_layer_norm.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.fc1.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.fc1.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.fc2.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.43.fc2.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.43.final_layer_norm.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.43.final_layer_norm.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.43.self_attn.k_proj.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn.k_proj.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn.out_proj.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn.out_proj.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn.q_proj.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn.q_proj.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn.v_proj.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn.v_proj.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn_layer_norm.bias": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.43.self_attn_layer_norm.weight": "pytorch_model-00024-of-00053.bin", + "model.decoder.layers.44.fc1.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.fc1.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.fc2.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.fc2.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.final_layer_norm.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.final_layer_norm.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn.k_proj.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn.k_proj.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn.out_proj.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn.out_proj.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn.q_proj.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn.q_proj.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn.v_proj.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn.v_proj.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn_layer_norm.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.44.self_attn_layer_norm.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.fc1.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.45.fc1.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.45.fc2.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.45.fc2.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.45.final_layer_norm.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.45.final_layer_norm.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.45.self_attn.k_proj.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn.k_proj.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn.out_proj.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn.out_proj.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn.q_proj.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn.q_proj.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn.v_proj.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn.v_proj.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn_layer_norm.bias": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.45.self_attn_layer_norm.weight": "pytorch_model-00025-of-00053.bin", + "model.decoder.layers.46.fc1.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.fc1.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.fc2.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.fc2.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.final_layer_norm.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.final_layer_norm.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn.k_proj.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn.k_proj.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn.out_proj.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn.out_proj.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn.q_proj.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn.q_proj.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn.v_proj.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn.v_proj.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn_layer_norm.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.46.self_attn_layer_norm.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.47.fc1.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.fc1.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.fc2.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.fc2.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.final_layer_norm.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.final_layer_norm.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.self_attn.k_proj.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.47.self_attn.k_proj.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.47.self_attn.out_proj.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.self_attn.out_proj.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.self_attn.q_proj.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.47.self_attn.q_proj.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.47.self_attn.v_proj.bias": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.47.self_attn.v_proj.weight": "pytorch_model-00026-of-00053.bin", + "model.decoder.layers.47.self_attn_layer_norm.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.47.self_attn_layer_norm.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.fc1.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.fc1.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.fc2.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.fc2.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.final_layer_norm.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.final_layer_norm.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn.k_proj.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn.k_proj.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn.out_proj.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn.out_proj.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn.q_proj.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn.q_proj.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn.v_proj.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn.v_proj.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn_layer_norm.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.48.self_attn_layer_norm.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.49.fc1.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.fc1.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.fc2.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.fc2.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.final_layer_norm.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.final_layer_norm.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.self_attn.k_proj.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.49.self_attn.k_proj.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.49.self_attn.out_proj.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.self_attn.out_proj.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.self_attn.q_proj.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.self_attn.q_proj.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.self_attn.v_proj.bias": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.49.self_attn.v_proj.weight": "pytorch_model-00027-of-00053.bin", + "model.decoder.layers.49.self_attn_layer_norm.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.49.self_attn_layer_norm.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.5.fc1.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.fc1.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.fc2.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.fc2.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.final_layer_norm.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.final_layer_norm.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.self_attn.k_proj.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.5.self_attn.k_proj.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.5.self_attn.out_proj.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.self_attn.out_proj.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.self_attn.q_proj.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.self_attn.q_proj.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.self_attn.v_proj.bias": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.5.self_attn.v_proj.weight": "pytorch_model-00003-of-00053.bin", + "model.decoder.layers.5.self_attn_layer_norm.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.5.self_attn_layer_norm.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.50.fc1.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.fc1.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.fc2.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.fc2.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.final_layer_norm.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.final_layer_norm.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn.k_proj.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn.k_proj.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn.out_proj.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn.out_proj.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn.q_proj.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn.q_proj.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn.v_proj.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn.v_proj.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn_layer_norm.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.50.self_attn_layer_norm.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.51.fc1.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.fc1.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.fc2.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.fc2.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.final_layer_norm.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.final_layer_norm.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.self_attn.k_proj.bias": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.51.self_attn.k_proj.weight": "pytorch_model-00028-of-00053.bin", + "model.decoder.layers.51.self_attn.out_proj.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.self_attn.out_proj.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.self_attn.q_proj.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.self_attn.q_proj.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.self_attn.v_proj.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.self_attn.v_proj.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.self_attn_layer_norm.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.51.self_attn_layer_norm.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.fc1.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.fc1.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.fc2.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.fc2.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.final_layer_norm.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.final_layer_norm.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn.k_proj.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn.k_proj.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn.out_proj.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn.out_proj.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn.q_proj.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn.q_proj.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn.v_proj.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn.v_proj.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn_layer_norm.bias": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.52.self_attn_layer_norm.weight": "pytorch_model-00029-of-00053.bin", + "model.decoder.layers.53.fc1.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.fc1.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.fc2.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.fc2.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.final_layer_norm.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.final_layer_norm.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn.k_proj.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn.k_proj.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn.out_proj.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn.out_proj.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn.q_proj.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn.q_proj.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn.v_proj.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn.v_proj.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn_layer_norm.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.53.self_attn_layer_norm.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.fc1.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.fc1.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.fc2.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.54.fc2.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.54.final_layer_norm.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.54.final_layer_norm.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.54.self_attn.k_proj.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn.k_proj.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn.out_proj.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn.out_proj.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn.q_proj.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn.q_proj.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn.v_proj.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn.v_proj.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn_layer_norm.bias": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.54.self_attn_layer_norm.weight": "pytorch_model-00030-of-00053.bin", + "model.decoder.layers.55.fc1.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.fc1.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.fc2.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.fc2.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.final_layer_norm.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.final_layer_norm.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn.k_proj.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn.k_proj.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn.out_proj.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn.out_proj.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn.q_proj.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn.q_proj.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn.v_proj.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn.v_proj.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn_layer_norm.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.55.self_attn_layer_norm.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.fc1.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.56.fc1.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.56.fc2.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.56.fc2.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.56.final_layer_norm.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.56.final_layer_norm.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.56.self_attn.k_proj.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn.k_proj.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn.out_proj.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn.out_proj.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn.q_proj.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn.q_proj.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn.v_proj.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn.v_proj.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn_layer_norm.bias": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.56.self_attn_layer_norm.weight": "pytorch_model-00031-of-00053.bin", + "model.decoder.layers.57.fc1.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.fc1.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.fc2.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.fc2.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.final_layer_norm.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.final_layer_norm.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn.k_proj.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn.k_proj.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn.out_proj.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn.out_proj.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn.q_proj.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn.q_proj.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn.v_proj.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn.v_proj.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn_layer_norm.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.57.self_attn_layer_norm.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.58.fc1.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.fc1.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.fc2.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.fc2.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.final_layer_norm.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.final_layer_norm.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.self_attn.k_proj.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.58.self_attn.k_proj.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.58.self_attn.out_proj.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.self_attn.out_proj.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.self_attn.q_proj.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.58.self_attn.q_proj.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.58.self_attn.v_proj.bias": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.58.self_attn.v_proj.weight": "pytorch_model-00032-of-00053.bin", + "model.decoder.layers.58.self_attn_layer_norm.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.58.self_attn_layer_norm.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.fc1.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.fc1.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.fc2.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.fc2.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.final_layer_norm.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.final_layer_norm.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn.k_proj.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn.k_proj.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn.out_proj.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn.out_proj.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn.q_proj.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn.q_proj.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn.v_proj.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn.v_proj.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn_layer_norm.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.59.self_attn_layer_norm.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.6.fc1.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.fc1.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.fc2.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.fc2.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.final_layer_norm.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.final_layer_norm.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn.k_proj.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn.k_proj.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn.out_proj.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn.out_proj.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn.q_proj.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn.q_proj.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn.v_proj.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn.v_proj.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn_layer_norm.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.6.self_attn_layer_norm.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.60.fc1.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.fc1.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.fc2.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.fc2.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.final_layer_norm.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.final_layer_norm.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.self_attn.k_proj.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.60.self_attn.k_proj.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.60.self_attn.out_proj.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.self_attn.out_proj.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.self_attn.q_proj.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.self_attn.q_proj.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.self_attn.v_proj.bias": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.60.self_attn.v_proj.weight": "pytorch_model-00033-of-00053.bin", + "model.decoder.layers.60.self_attn_layer_norm.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.60.self_attn_layer_norm.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.fc1.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.fc1.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.fc2.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.fc2.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.final_layer_norm.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.final_layer_norm.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn.k_proj.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn.k_proj.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn.out_proj.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn.out_proj.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn.q_proj.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn.q_proj.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn.v_proj.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn.v_proj.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn_layer_norm.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.61.self_attn_layer_norm.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.62.fc1.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.fc1.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.fc2.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.fc2.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.final_layer_norm.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.final_layer_norm.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.self_attn.k_proj.bias": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.62.self_attn.k_proj.weight": "pytorch_model-00034-of-00053.bin", + "model.decoder.layers.62.self_attn.out_proj.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.self_attn.out_proj.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.self_attn.q_proj.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.self_attn.q_proj.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.self_attn.v_proj.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.self_attn.v_proj.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.self_attn_layer_norm.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.62.self_attn_layer_norm.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.fc1.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.fc1.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.fc2.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.fc2.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.final_layer_norm.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.final_layer_norm.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn.k_proj.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn.k_proj.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn.out_proj.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn.out_proj.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn.q_proj.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn.q_proj.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn.v_proj.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn.v_proj.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn_layer_norm.bias": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.63.self_attn_layer_norm.weight": "pytorch_model-00035-of-00053.bin", + "model.decoder.layers.64.fc1.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.fc1.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.fc2.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.fc2.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.final_layer_norm.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.final_layer_norm.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn.k_proj.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn.k_proj.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn.out_proj.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn.out_proj.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn.q_proj.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn.q_proj.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn.v_proj.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn.v_proj.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn_layer_norm.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.64.self_attn_layer_norm.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.fc1.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.fc1.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.fc2.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.65.fc2.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.65.final_layer_norm.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.65.final_layer_norm.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.65.self_attn.k_proj.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn.k_proj.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn.out_proj.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn.out_proj.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn.q_proj.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn.q_proj.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn.v_proj.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn.v_proj.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn_layer_norm.bias": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.65.self_attn_layer_norm.weight": "pytorch_model-00036-of-00053.bin", + "model.decoder.layers.66.fc1.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.fc1.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.fc2.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.fc2.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.final_layer_norm.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.final_layer_norm.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn.k_proj.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn.k_proj.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn.out_proj.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn.out_proj.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn.q_proj.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn.q_proj.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn.v_proj.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn.v_proj.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn_layer_norm.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.66.self_attn_layer_norm.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.fc1.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.67.fc1.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.67.fc2.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.67.fc2.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.67.final_layer_norm.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.67.final_layer_norm.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.67.self_attn.k_proj.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn.k_proj.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn.out_proj.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn.out_proj.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn.q_proj.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn.q_proj.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn.v_proj.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn.v_proj.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn_layer_norm.bias": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.67.self_attn_layer_norm.weight": "pytorch_model-00037-of-00053.bin", + "model.decoder.layers.68.fc1.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.fc1.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.fc2.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.fc2.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.final_layer_norm.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.final_layer_norm.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn.k_proj.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn.k_proj.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn.out_proj.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn.out_proj.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn.q_proj.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn.q_proj.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn.v_proj.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn.v_proj.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn_layer_norm.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.68.self_attn_layer_norm.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.69.fc1.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.fc1.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.fc2.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.fc2.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.final_layer_norm.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.final_layer_norm.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.self_attn.k_proj.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.69.self_attn.k_proj.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.69.self_attn.out_proj.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.self_attn.out_proj.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.self_attn.q_proj.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.69.self_attn.q_proj.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.69.self_attn.v_proj.bias": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.69.self_attn.v_proj.weight": "pytorch_model-00038-of-00053.bin", + "model.decoder.layers.69.self_attn_layer_norm.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.69.self_attn_layer_norm.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.7.fc1.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.fc1.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.fc2.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.fc2.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.final_layer_norm.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.final_layer_norm.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.self_attn.k_proj.bias": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.7.self_attn.k_proj.weight": "pytorch_model-00004-of-00053.bin", + "model.decoder.layers.7.self_attn.out_proj.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.self_attn.out_proj.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.self_attn.q_proj.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.self_attn.q_proj.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.self_attn.v_proj.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.self_attn.v_proj.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.self_attn_layer_norm.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.7.self_attn_layer_norm.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.70.fc1.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.fc1.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.fc2.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.fc2.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.final_layer_norm.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.final_layer_norm.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn.k_proj.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn.k_proj.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn.out_proj.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn.out_proj.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn.q_proj.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn.q_proj.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn.v_proj.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn.v_proj.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn_layer_norm.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.70.self_attn_layer_norm.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.71.fc1.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.fc1.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.fc2.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.fc2.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.final_layer_norm.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.final_layer_norm.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.self_attn.k_proj.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.71.self_attn.k_proj.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.71.self_attn.out_proj.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.self_attn.out_proj.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.self_attn.q_proj.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.self_attn.q_proj.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.self_attn.v_proj.bias": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.71.self_attn.v_proj.weight": "pytorch_model-00039-of-00053.bin", + "model.decoder.layers.71.self_attn_layer_norm.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.71.self_attn_layer_norm.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.fc1.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.fc1.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.fc2.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.fc2.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.final_layer_norm.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.final_layer_norm.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn.k_proj.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn.k_proj.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn.out_proj.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn.out_proj.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn.q_proj.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn.q_proj.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn.v_proj.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn.v_proj.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn_layer_norm.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.72.self_attn_layer_norm.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.73.fc1.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.fc1.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.fc2.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.fc2.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.final_layer_norm.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.final_layer_norm.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.self_attn.k_proj.bias": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.73.self_attn.k_proj.weight": "pytorch_model-00040-of-00053.bin", + "model.decoder.layers.73.self_attn.out_proj.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.self_attn.out_proj.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.self_attn.q_proj.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.self_attn.q_proj.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.self_attn.v_proj.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.self_attn.v_proj.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.self_attn_layer_norm.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.73.self_attn_layer_norm.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.fc1.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.fc1.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.fc2.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.fc2.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.final_layer_norm.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.final_layer_norm.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn.k_proj.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn.k_proj.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn.out_proj.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn.out_proj.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn.q_proj.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn.q_proj.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn.v_proj.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn.v_proj.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn_layer_norm.bias": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.74.self_attn_layer_norm.weight": "pytorch_model-00041-of-00053.bin", + "model.decoder.layers.75.fc1.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.fc1.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.fc2.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.fc2.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.final_layer_norm.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.final_layer_norm.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn.k_proj.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn.k_proj.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn.out_proj.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn.out_proj.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn.q_proj.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn.q_proj.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn.v_proj.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn.v_proj.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn_layer_norm.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.75.self_attn_layer_norm.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.fc1.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.fc1.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.fc2.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.76.fc2.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.76.final_layer_norm.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.76.final_layer_norm.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.76.self_attn.k_proj.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn.k_proj.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn.out_proj.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn.out_proj.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn.q_proj.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn.q_proj.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn.v_proj.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn.v_proj.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn_layer_norm.bias": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.76.self_attn_layer_norm.weight": "pytorch_model-00042-of-00053.bin", + "model.decoder.layers.77.fc1.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.fc1.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.fc2.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.fc2.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.final_layer_norm.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.final_layer_norm.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn.k_proj.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn.k_proj.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn.out_proj.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn.out_proj.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn.q_proj.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn.q_proj.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn.v_proj.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn.v_proj.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn_layer_norm.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.77.self_attn_layer_norm.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.fc1.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.78.fc1.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.78.fc2.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.78.fc2.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.78.final_layer_norm.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.78.final_layer_norm.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.78.self_attn.k_proj.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn.k_proj.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn.out_proj.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn.out_proj.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn.q_proj.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn.q_proj.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn.v_proj.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn.v_proj.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn_layer_norm.bias": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.78.self_attn_layer_norm.weight": "pytorch_model-00043-of-00053.bin", + "model.decoder.layers.79.fc1.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.fc1.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.fc2.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.fc2.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.final_layer_norm.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.final_layer_norm.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn.k_proj.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn.k_proj.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn.out_proj.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn.out_proj.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn.q_proj.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn.q_proj.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn.v_proj.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn.v_proj.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn_layer_norm.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.79.self_attn_layer_norm.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.8.fc1.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.fc1.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.fc2.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.fc2.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.final_layer_norm.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.final_layer_norm.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn.k_proj.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn.k_proj.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn.out_proj.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn.out_proj.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn.q_proj.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn.q_proj.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn.v_proj.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn.v_proj.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn_layer_norm.bias": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.8.self_attn_layer_norm.weight": "pytorch_model-00005-of-00053.bin", + "model.decoder.layers.80.fc1.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.fc1.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.fc2.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.fc2.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.final_layer_norm.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.final_layer_norm.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.self_attn.k_proj.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.80.self_attn.k_proj.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.80.self_attn.out_proj.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.self_attn.out_proj.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.self_attn.q_proj.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.80.self_attn.q_proj.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.80.self_attn.v_proj.bias": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.80.self_attn.v_proj.weight": "pytorch_model-00044-of-00053.bin", + "model.decoder.layers.80.self_attn_layer_norm.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.80.self_attn_layer_norm.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.fc1.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.fc1.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.fc2.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.fc2.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.final_layer_norm.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.final_layer_norm.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn.k_proj.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn.k_proj.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn.out_proj.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn.out_proj.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn.q_proj.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn.q_proj.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn.v_proj.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn.v_proj.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn_layer_norm.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.81.self_attn_layer_norm.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.82.fc1.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.fc1.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.fc2.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.fc2.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.final_layer_norm.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.final_layer_norm.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.self_attn.k_proj.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.82.self_attn.k_proj.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.82.self_attn.out_proj.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.self_attn.out_proj.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.self_attn.q_proj.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.self_attn.q_proj.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.self_attn.v_proj.bias": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.82.self_attn.v_proj.weight": "pytorch_model-00045-of-00053.bin", + "model.decoder.layers.82.self_attn_layer_norm.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.82.self_attn_layer_norm.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.fc1.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.fc1.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.fc2.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.fc2.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.final_layer_norm.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.final_layer_norm.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn.k_proj.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn.k_proj.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn.out_proj.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn.out_proj.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn.q_proj.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn.q_proj.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn.v_proj.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn.v_proj.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn_layer_norm.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.83.self_attn_layer_norm.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.84.fc1.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.fc1.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.fc2.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.fc2.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.final_layer_norm.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.final_layer_norm.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.self_attn.k_proj.bias": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.84.self_attn.k_proj.weight": "pytorch_model-00046-of-00053.bin", + "model.decoder.layers.84.self_attn.out_proj.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.self_attn.out_proj.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.self_attn.q_proj.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.self_attn.q_proj.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.self_attn.v_proj.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.self_attn.v_proj.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.self_attn_layer_norm.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.84.self_attn_layer_norm.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.fc1.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.fc1.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.fc2.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.fc2.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.final_layer_norm.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.final_layer_norm.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn.k_proj.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn.k_proj.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn.out_proj.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn.out_proj.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn.q_proj.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn.q_proj.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn.v_proj.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn.v_proj.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn_layer_norm.bias": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.85.self_attn_layer_norm.weight": "pytorch_model-00047-of-00053.bin", + "model.decoder.layers.86.fc1.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.fc1.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.fc2.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.fc2.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.final_layer_norm.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.final_layer_norm.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn.k_proj.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn.k_proj.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn.out_proj.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn.out_proj.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn.q_proj.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn.q_proj.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn.v_proj.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn.v_proj.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn_layer_norm.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.86.self_attn_layer_norm.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.fc1.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.fc1.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.fc2.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.87.fc2.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.87.final_layer_norm.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.87.final_layer_norm.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.87.self_attn.k_proj.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn.k_proj.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn.out_proj.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn.out_proj.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn.q_proj.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn.q_proj.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn.v_proj.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn.v_proj.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn_layer_norm.bias": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.87.self_attn_layer_norm.weight": "pytorch_model-00048-of-00053.bin", + "model.decoder.layers.88.fc1.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.fc1.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.fc2.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.fc2.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.final_layer_norm.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.final_layer_norm.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn.k_proj.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn.k_proj.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn.out_proj.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn.out_proj.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn.q_proj.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn.q_proj.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn.v_proj.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn.v_proj.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn_layer_norm.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.88.self_attn_layer_norm.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.fc1.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.89.fc1.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.89.fc2.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.89.fc2.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.89.final_layer_norm.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.89.final_layer_norm.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.89.self_attn.k_proj.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn.k_proj.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn.out_proj.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn.out_proj.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn.q_proj.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn.q_proj.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn.v_proj.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn.v_proj.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn_layer_norm.bias": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.89.self_attn_layer_norm.weight": "pytorch_model-00049-of-00053.bin", + "model.decoder.layers.9.fc1.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.fc1.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.fc2.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.fc2.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.final_layer_norm.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.final_layer_norm.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn.k_proj.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn.k_proj.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn.out_proj.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn.out_proj.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn.q_proj.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn.q_proj.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn.v_proj.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn.v_proj.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn_layer_norm.bias": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.9.self_attn_layer_norm.weight": "pytorch_model-00006-of-00053.bin", + "model.decoder.layers.90.fc1.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.fc1.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.fc2.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.fc2.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.final_layer_norm.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.final_layer_norm.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn.k_proj.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn.k_proj.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn.out_proj.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn.out_proj.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn.q_proj.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn.q_proj.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn.v_proj.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn.v_proj.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn_layer_norm.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.90.self_attn_layer_norm.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.91.fc1.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.fc1.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.fc2.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.fc2.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.final_layer_norm.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.final_layer_norm.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.self_attn.k_proj.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.91.self_attn.k_proj.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.91.self_attn.out_proj.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.self_attn.out_proj.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.self_attn.q_proj.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.91.self_attn.q_proj.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.91.self_attn.v_proj.bias": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.91.self_attn.v_proj.weight": "pytorch_model-00050-of-00053.bin", + "model.decoder.layers.91.self_attn_layer_norm.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.91.self_attn_layer_norm.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.fc1.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.fc1.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.fc2.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.fc2.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.final_layer_norm.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.final_layer_norm.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn.k_proj.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn.k_proj.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn.out_proj.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn.out_proj.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn.q_proj.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn.q_proj.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn.v_proj.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn.v_proj.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn_layer_norm.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.92.self_attn_layer_norm.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.93.fc1.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.fc1.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.fc2.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.fc2.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.final_layer_norm.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.final_layer_norm.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.self_attn.k_proj.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.93.self_attn.k_proj.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.93.self_attn.out_proj.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.self_attn.out_proj.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.self_attn.q_proj.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.self_attn.q_proj.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.self_attn.v_proj.bias": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.93.self_attn.v_proj.weight": "pytorch_model-00051-of-00053.bin", + "model.decoder.layers.93.self_attn_layer_norm.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.93.self_attn_layer_norm.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.fc1.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.fc1.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.fc2.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.fc2.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.final_layer_norm.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.final_layer_norm.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn.k_proj.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn.k_proj.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn.out_proj.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn.out_proj.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn.q_proj.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn.q_proj.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn.v_proj.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn.v_proj.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn_layer_norm.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.94.self_attn_layer_norm.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.95.fc1.bias": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.fc1.weight": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.fc2.bias": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.fc2.weight": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.final_layer_norm.bias": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.final_layer_norm.weight": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.self_attn.k_proj.bias": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.95.self_attn.k_proj.weight": "pytorch_model-00052-of-00053.bin", + "model.decoder.layers.95.self_attn.out_proj.bias": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.self_attn.out_proj.weight": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.self_attn.q_proj.bias": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.self_attn.q_proj.weight": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.self_attn.v_proj.bias": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.self_attn.v_proj.weight": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.self_attn_layer_norm.bias": "pytorch_model-00053-of-00053.bin", + "model.decoder.layers.95.self_attn_layer_norm.weight": "pytorch_model-00053-of-00053.bin" + } +}