File size: 1,516 Bytes
04a532c f9256f3 3f6ff1c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c f9256f3 04a532c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
{
"amp": 1,
"architectures": [
"FlaubertWithLMHeadModel"
],
"asm": false,
"attention_dropout": 0.1,
"bos_index": 0,
"bos_token_id": 0,
"bptt": 512,
"causal": false,
"clip_grad_norm": 5,
"dropout": 0.1,
"emb_dim": 1024,
"embed_init_std": 0.02209708691207961,
"encoder_only": true,
"end_n_top": 5,
"eos_index": 1,
"fp16": true,
"gelu_activation": true,
"group_by_size": true,
"id2lang": {
"0": "fr"
},
"init_std": 0.02,
"is_encoder": true,
"lang2id": {
"fr": 0
},
"lang_id": 0,
"langs": [
"fr"
],
"layer_norm_eps": 1e-06,
"layerdrop": 0.2,
"lg_sampling_factor": -1,
"lgs": "fr",
"mask_index": 5,
"mask_token_id": 0,
"max_batch_size": 0,
"max_position_embeddings": 512,
"max_vocab": -1,
"mlm_steps": [
[
"fr",
null
]
],
"model_type": "flaubert",
"n_heads": 16,
"n_langs": 1,
"n_layers": 24,
"pad_index": 2,
"pad_token_id": 2,
"pre_norm": true,
"sample_alpha": 0,
"share_inout_emb": true,
"sinusoidal_embeddings": false,
"start_n_top": 5,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "first",
"summary_use_proj": true,
"tokens_per_batch": -1,
"unk_index": 3,
"use_apex": true,
"use_lang_emb": true,
"vocab_size": 68729,
"word_blank": 0,
"word_dropout": 0,
"word_keep": 0.1,
"word_mask": 0.8,
"word_mask_keep_rand": "0.8,0.1,0.1",
"word_pred": 0.15,
"word_rand": 0.1,
"word_shuffle": 0
}
|