Translation
Transformers
PyTorch
nllb-moe
feature-extraction
ArthurZ HF staff commited on
Commit
474e463
1 Parent(s): f1f2644

Upload model

Browse files
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/arthur_huggingface_co/fairseq/weights/checkpoints/hf-converted-moe-54b",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "NllbMoeModel"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "batch_prioritized_routing": false,
10
+ "bos_token_id": 0,
11
+ "d_model": 2048,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 8192,
14
+ "decoder_layerdrop": 0,
15
+ "decoder_layers": 24,
16
+ "decoder_sparse_step": 4,
17
+ "decoder_start_token_id": 2,
18
+ "dropout": 0.1,
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 8192,
21
+ "encoder_layerdrop": 0,
22
+ "encoder_layers": 24,
23
+ "encoder_sparse_step": 4,
24
+ "eos_token_id": 2,
25
+ "expert_capacity": 64,
26
+ "init_std": 0.02,
27
+ "is_encoder_decoder": true,
28
+ "max_length": 200,
29
+ "max_position_embeddings": 1024,
30
+ "model_type": "nllb_moe",
31
+ "moe_eval_capacity_token_fraction": 1.0,
32
+ "moe_token_dropout": 0.2,
33
+ "normalize_router_prob_before_dropping": false,
34
+ "num_experts": 128,
35
+ "num_hidden_layers": 24,
36
+ "pad_token_id": 1,
37
+ "router_aux_loss_coef": 0.001,
38
+ "router_bias": false,
39
+ "router_dtype": "float32",
40
+ "router_ignore_padding_tokens": false,
41
+ "router_jitter_noise": 0.01,
42
+ "router_type": "tokens_masked",
43
+ "router_z_loss_coef": 0.001,
44
+ "scale_embedding": true,
45
+ "second_expert_policy": "all",
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.27.0.dev0",
48
+ "use_cache": true,
49
+ "vocab_size": 256206
50
+ }
pytorch_model-00001-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9a01081279f5270e50b66114b359399d178d3f70c807068a42e728bc9b09ac6
3
+ size 7881650579
pytorch_model-00002-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c69aedd9d34c776f077979cf65d6d7c982a4598151646667bd45901532ff0cc8
3
+ size 9935240893
pytorch_model-00003-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e87b61b1b17440b137bc5e47105fd13cb3a7f21ee2dea186e492f707b9e8b794
3
+ size 9936482623
pytorch_model-00004-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7f5b7b360f8bed14ebf5c3a7c0e01b568af5059b28e4ec306386272fe8eedf9
3
+ size 9935240957
pytorch_model-00005-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c904df100b9928282769b9360733783bbeabf5600a122016ca6c4791ae7652ff
3
+ size 9936482987
pytorch_model-00006-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1958a842daacff1ce3e1da3bc05a6ec28f06741a14190b1df76c4ac71116bf98
3
+ size 9936483287
pytorch_model-00007-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4352bba732c871dc5356dbb4dcdc9de6cd81a1c4b7b3563913b56ebc648d12e0
3
+ size 9935241149
pytorch_model-00008-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57959c787bd138a2c69b337d577ca3a28cb377a2aa86d8bf1f2d8ecfe14a04f7
3
+ size 9936482743
pytorch_model-00009-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c2b96fb7b580fc3933e3abe60946a5ff17fc970faf16f2b38373664159bf911
3
+ size 9935241149
pytorch_model-00010-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f31ddd24c833696b5600cc8b135dd62a2bcca0090dbc28062391e697aa604548
3
+ size 9936482743
pytorch_model-00011-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e463aa0ea26d4d957a99c0e7e831eb663ae59746360a2f122a10989fe448153
3
+ size 9935241213
pytorch_model-00012-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaab08a291e020ce948fbb5bd2b509eda3c2bbf865306ce5708dbf5b1afc1445
3
+ size 9962851615
pytorch_model-00013-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6620b4464ca0fd9b59f9bd43679ddea18992a9da01eaf13b9a6fed2b203f90ef
3
+ size 9935240893
pytorch_model-00014-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8889365f817ff4da86c77c199af36c3bf64695e1228bb5043f644717cb55bc72
3
+ size 9936607643
pytorch_model-00015-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fef1ccdbf6c00eb37cd989d4aca529d3fbff16d30f6dec8e93773fd633ba886
3
+ size 9935240957
pytorch_model-00016-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc0245e4bd3e43ebd01d0c6d79b4faa7b88834cdcf2cbb09ebeb430bde2c8bf
3
+ size 9936608295
pytorch_model-00017-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33268527cfc42cfeb888820f8c75c0d48b76dee487b30ab5364a4d95974e786
3
+ size 9936608727
pytorch_model-00018-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b1f46e82e2fbb655dc468fc37b0b1fbf4673e76295f9cf33c48dad828545aca
3
+ size 9935241149
pytorch_model-00019-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c7a510a5910470e794fd30e3a91282971f5fbc91bf607ea816c4589e8666932
3
+ size 9936608567
pytorch_model-00020-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7a1ac1eaf762ba49cad65572bdc671597334df3ae5e9d553313dd6b80b083d
3
+ size 9935241149
pytorch_model-00021-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd3ce3b9671096c70c59cdace5eefd70b782ec42f951c44de9bf21d835197d7
3
+ size 9936607787
pytorch_model-00022-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff18afcbd5879cd650d82ee5d2bd391654aaf6d7e8444792312ead42d5557eb
3
+ size 9935241149
pytorch_model-00023-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:808cebcbf4b6ae4cee04ab20f669a3e2cf4e95faafd3c5116385d961d372f596
3
+ size 3557910629
pytorch_model.bin.index.json ADDED
The diff for this file is too large to render. See raw diff