Upload model
Browse files- config.json +50 -0
- pytorch_model-00001-of-00023.bin +3 -0
- pytorch_model-00002-of-00023.bin +3 -0
- pytorch_model-00003-of-00023.bin +3 -0
- pytorch_model-00004-of-00023.bin +3 -0
- pytorch_model-00005-of-00023.bin +3 -0
- pytorch_model-00006-of-00023.bin +3 -0
- pytorch_model-00007-of-00023.bin +3 -0
- pytorch_model-00008-of-00023.bin +3 -0
- pytorch_model-00009-of-00023.bin +3 -0
- pytorch_model-00010-of-00023.bin +3 -0
- pytorch_model-00011-of-00023.bin +3 -0
- pytorch_model-00012-of-00023.bin +3 -0
- pytorch_model-00013-of-00023.bin +3 -0
- pytorch_model-00014-of-00023.bin +3 -0
- pytorch_model-00015-of-00023.bin +3 -0
- pytorch_model-00016-of-00023.bin +3 -0
- pytorch_model-00017-of-00023.bin +3 -0
- pytorch_model-00018-of-00023.bin +3 -0
- pytorch_model-00019-of-00023.bin +3 -0
- pytorch_model-00020-of-00023.bin +3 -0
- pytorch_model-00021-of-00023.bin +3 -0
- pytorch_model-00022-of-00023.bin +3 -0
- pytorch_model-00023-of-00023.bin +3 -0
- pytorch_model.bin.index.json +0 -0
config.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/home/arthur_huggingface_co/fairseq/weights/checkpoints/hf-converted-moe-54b",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "relu",
|
5 |
+
"architectures": [
|
6 |
+
"NllbMoeModel"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"batch_prioritized_routing": false,
|
10 |
+
"bos_token_id": 0,
|
11 |
+
"d_model": 2048,
|
12 |
+
"decoder_attention_heads": 16,
|
13 |
+
"decoder_ffn_dim": 8192,
|
14 |
+
"decoder_layerdrop": 0,
|
15 |
+
"decoder_layers": 24,
|
16 |
+
"decoder_sparse_step": 4,
|
17 |
+
"decoder_start_token_id": 2,
|
18 |
+
"dropout": 0.1,
|
19 |
+
"encoder_attention_heads": 16,
|
20 |
+
"encoder_ffn_dim": 8192,
|
21 |
+
"encoder_layerdrop": 0,
|
22 |
+
"encoder_layers": 24,
|
23 |
+
"encoder_sparse_step": 4,
|
24 |
+
"eos_token_id": 2,
|
25 |
+
"expert_capacity": 64,
|
26 |
+
"init_std": 0.02,
|
27 |
+
"is_encoder_decoder": true,
|
28 |
+
"max_length": 200,
|
29 |
+
"max_position_embeddings": 1024,
|
30 |
+
"model_type": "nllb_moe",
|
31 |
+
"moe_eval_capacity_token_fraction": 1.0,
|
32 |
+
"moe_token_dropout": 0.2,
|
33 |
+
"normalize_router_prob_before_dropping": false,
|
34 |
+
"num_experts": 128,
|
35 |
+
"num_hidden_layers": 24,
|
36 |
+
"pad_token_id": 1,
|
37 |
+
"router_aux_loss_coef": 0.001,
|
38 |
+
"router_bias": false,
|
39 |
+
"router_dtype": "float32",
|
40 |
+
"router_ignore_padding_tokens": false,
|
41 |
+
"router_jitter_noise": 0.01,
|
42 |
+
"router_type": "tokens_masked",
|
43 |
+
"router_z_loss_coef": 0.001,
|
44 |
+
"scale_embedding": true,
|
45 |
+
"second_expert_policy": "all",
|
46 |
+
"torch_dtype": "float32",
|
47 |
+
"transformers_version": "4.27.0.dev0",
|
48 |
+
"use_cache": true,
|
49 |
+
"vocab_size": 256206
|
50 |
+
}
|
pytorch_model-00001-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9a01081279f5270e50b66114b359399d178d3f70c807068a42e728bc9b09ac6
|
3 |
+
size 7881650579
|
pytorch_model-00002-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c69aedd9d34c776f077979cf65d6d7c982a4598151646667bd45901532ff0cc8
|
3 |
+
size 9935240893
|
pytorch_model-00003-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e87b61b1b17440b137bc5e47105fd13cb3a7f21ee2dea186e492f707b9e8b794
|
3 |
+
size 9936482623
|
pytorch_model-00004-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c7f5b7b360f8bed14ebf5c3a7c0e01b568af5059b28e4ec306386272fe8eedf9
|
3 |
+
size 9935240957
|
pytorch_model-00005-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c904df100b9928282769b9360733783bbeabf5600a122016ca6c4791ae7652ff
|
3 |
+
size 9936482987
|
pytorch_model-00006-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1958a842daacff1ce3e1da3bc05a6ec28f06741a14190b1df76c4ac71116bf98
|
3 |
+
size 9936483287
|
pytorch_model-00007-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4352bba732c871dc5356dbb4dcdc9de6cd81a1c4b7b3563913b56ebc648d12e0
|
3 |
+
size 9935241149
|
pytorch_model-00008-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57959c787bd138a2c69b337d577ca3a28cb377a2aa86d8bf1f2d8ecfe14a04f7
|
3 |
+
size 9936482743
|
pytorch_model-00009-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c2b96fb7b580fc3933e3abe60946a5ff17fc970faf16f2b38373664159bf911
|
3 |
+
size 9935241149
|
pytorch_model-00010-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f31ddd24c833696b5600cc8b135dd62a2bcca0090dbc28062391e697aa604548
|
3 |
+
size 9936482743
|
pytorch_model-00011-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e463aa0ea26d4d957a99c0e7e831eb663ae59746360a2f122a10989fe448153
|
3 |
+
size 9935241213
|
pytorch_model-00012-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eaab08a291e020ce948fbb5bd2b509eda3c2bbf865306ce5708dbf5b1afc1445
|
3 |
+
size 9962851615
|
pytorch_model-00013-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6620b4464ca0fd9b59f9bd43679ddea18992a9da01eaf13b9a6fed2b203f90ef
|
3 |
+
size 9935240893
|
pytorch_model-00014-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8889365f817ff4da86c77c199af36c3bf64695e1228bb5043f644717cb55bc72
|
3 |
+
size 9936607643
|
pytorch_model-00015-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fef1ccdbf6c00eb37cd989d4aca529d3fbff16d30f6dec8e93773fd633ba886
|
3 |
+
size 9935240957
|
pytorch_model-00016-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fc0245e4bd3e43ebd01d0c6d79b4faa7b88834cdcf2cbb09ebeb430bde2c8bf
|
3 |
+
size 9936608295
|
pytorch_model-00017-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d33268527cfc42cfeb888820f8c75c0d48b76dee487b30ab5364a4d95974e786
|
3 |
+
size 9936608727
|
pytorch_model-00018-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b1f46e82e2fbb655dc468fc37b0b1fbf4673e76295f9cf33c48dad828545aca
|
3 |
+
size 9935241149
|
pytorch_model-00019-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c7a510a5910470e794fd30e3a91282971f5fbc91bf607ea816c4589e8666932
|
3 |
+
size 9936608567
|
pytorch_model-00020-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f7a1ac1eaf762ba49cad65572bdc671597334df3ae5e9d553313dd6b80b083d
|
3 |
+
size 9935241149
|
pytorch_model-00021-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afd3ce3b9671096c70c59cdace5eefd70b782ec42f951c44de9bf21d835197d7
|
3 |
+
size 9936607787
|
pytorch_model-00022-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eff18afcbd5879cd650d82ee5d2bd391654aaf6d7e8444792312ead42d5557eb
|
3 |
+
size 9935241149
|
pytorch_model-00023-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:808cebcbf4b6ae4cee04ab20f669a3e2cf4e95faafd3c5116385d961d372f596
|
3 |
+
size 3557910629
|
pytorch_model.bin.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|