AlekseyKorshuk commited on
Commit
f59a1a3
1 Parent(s): 2c9e29c

Upload folder using huggingface_hub

Browse files
backbone_configs/exp179.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.34.1",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128001
33
+ }
backbone_configs/exp184.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.34.1",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128001
33
+ }
backbone_configs/exp200.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.05,
4
+ "attention_probs_dropout_prob": 0.05,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.34.1",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128001
33
+ }
backbone_configs/exp222.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.34.1",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128001
33
+ }
backbone_configs/exp477.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.36.2",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128100
33
+ }
backbone_configs/exp478.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.36.2",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128100
33
+ }
backbone_configs/exp489.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.36.2",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128100
33
+ }
backbone_configs/exp492.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.36.2",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128100
33
+ }
backbone_configs/exp500.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.0,
4
+ "attention_probs_dropout_prob": 0.0,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.0,
7
+ "hidden_dropout_prob": 0.0,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.36.2",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128100
33
+ }
backbone_configs/exp510.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.0,
4
+ "attention_probs_dropout_prob": 0.0,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.0,
7
+ "hidden_dropout_prob": 0.0,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.36.2",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128100
33
+ }
backbone_configs/exp511.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.36.2",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128100
33
+ }
backbone_configs/exp512.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-large",
3
+ "attention_dropout": 0.1,
4
+ "attention_probs_dropout_prob": 0.1,
5
+ "hidden_act": "gelu",
6
+ "hidden_dropout": 0.05,
7
+ "hidden_dropout_prob": 0.05,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-07,
12
+ "max_position_embeddings": 512,
13
+ "max_relative_positions": -1,
14
+ "model_type": "deberta-v2",
15
+ "norm_rel_ebd": "layer_norm",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 24,
18
+ "pad_token_id": 0,
19
+ "pooler_dropout": 0,
20
+ "pooler_hidden_act": "gelu",
21
+ "pooler_hidden_size": 1024,
22
+ "pos_att_type": [
23
+ "p2c",
24
+ "c2p"
25
+ ],
26
+ "position_biased_input": false,
27
+ "position_buckets": 256,
28
+ "relative_attention": true,
29
+ "share_att_key": true,
30
+ "transformers_version": "4.36.2",
31
+ "type_vocab_size": 0,
32
+ "vocab_size": 128100
33
+ }
configs/exp179.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /notebooks/models/exp179
2
+ best_model_path: /notebooks/models/exp179/models/fold_1_42_best.pth
3
+ checkpoint_path: /notebooks/models/exp179/chkp/fold_1_42_chkp.pth
4
+ config_path: /notebooks/models/exp179/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /notebooks/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 256
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 48
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 48
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp179_seed42
49
+ external_dir: /notebooks/data/external
50
+ fold: 1
51
+ interim_dir: /notebooks/data/interim
52
+ log_path: /notebooks/models/exp179/logs/fold-1.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: false
65
+ freeze_n_layers: 0
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: true
72
+ load_n_layers: 24
73
+ load_parts: false
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: None
77
+ models_dir: /notebooks/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /notebooks/data/processed
89
+ raw_dir: /notebooks/data/raw
90
+ run_dir: /notebooks/models/exp179
91
+ run_id: exp179_seed42_fold1
92
+ run_name: exp179_seed42_fold1
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 42
99
+ tokenizer: null
100
+ tokenizer_path: /notebooks/models/exp179/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 3
104
+ evaluate_n_times_per_epoch: 1
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp184.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /notebooks/models/exp184
2
+ best_model_path: /notebooks/models/exp184/models/fold_0_42_best.pth
3
+ checkpoint_path: /notebooks/models/exp184/chkp/fold_0_42_chkp.pth
4
+ config_path: /notebooks/models/exp184/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /notebooks/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 256
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 48
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 48
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp184_seed42
49
+ external_dir: /notebooks/data/external
50
+ fold: 0
51
+ interim_dir: /notebooks/data/interim
52
+ log_path: /notebooks/models/exp184/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 20
57
+ use_wandb: true
58
+ valid_print_frequency: 20
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: false
65
+ freeze_n_layers: 0
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: true
72
+ load_n_layers: 24
73
+ load_parts: false
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: None
77
+ models_dir: /notebooks/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /notebooks/data/processed
89
+ raw_dir: /notebooks/data/raw
90
+ run_dir: /notebooks/models/exp184
91
+ run_id: exp184_seed42_fold0
92
+ run_name: exp184_seed42_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 42
99
+ tokenizer: null
100
+ tokenizer_path: /notebooks/models/exp184/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 3
104
+ evaluate_n_times_per_epoch: 4
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp200.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /notebooks/models/exp200
2
+ best_model_path: /notebooks/models/exp200/models/fold_0_42_best.pth
3
+ checkpoint_path: /notebooks/models/exp200/chkp/fold_0_42_chkp.pth
4
+ config_path: /notebooks/models/exp200/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /notebooks/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 512
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 12
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 12
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp200_seed42
49
+ external_dir: /notebooks/data/external
50
+ fold: 0
51
+ interim_dir: /notebooks/data/interim
52
+ log_path: /notebooks/models/exp200/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 20
57
+ use_wandb: true
58
+ valid_print_frequency: 20
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.05
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: false
65
+ freeze_n_layers: 0
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: true
72
+ load_n_layers: 24
73
+ load_parts: false
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: None
77
+ models_dir: /notebooks/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 1.5e-05
82
+ embeddings_lr: 1.5e-05
83
+ encoder_lr: 1.5e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /notebooks/data/processed
89
+ raw_dir: /notebooks/data/raw
90
+ run_dir: /notebooks/models/exp200
91
+ run_id: exp200_seed42_fold0
92
+ run_name: exp200_seed42_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 42
99
+ tokenizer: null
100
+ tokenizer_path: /notebooks/models/exp200/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 3
104
+ evaluate_n_times_per_epoch: 4
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp222.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /notebooks/models/exp222
2
+ best_model_path: /notebooks/models/exp222/models/fold_0_2023_best.pth
3
+ checkpoint_path: /notebooks/models/exp222/chkp/fold_0_2023_chkp.pth
4
+ config_path: /notebooks/models/exp222/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /notebooks/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 256
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 48
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 48
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp222_seed2023
49
+ external_dir: /notebooks/data/external
50
+ fold: 0
51
+ interim_dir: /notebooks/data/interim
52
+ log_path: /notebooks/models/exp222/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: false
65
+ freeze_n_layers: 0
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: true
72
+ load_n_layers: 24
73
+ load_parts: false
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: None
77
+ models_dir: /notebooks/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /notebooks/data/processed
89
+ raw_dir: /notebooks/data/raw
90
+ run_dir: /notebooks/models/exp222
91
+ run_id: exp222_seed2023_fold0
92
+ run_name: exp222_seed2023_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 2023
99
+ tokenizer: null
100
+ tokenizer_path: /notebooks/models/exp222/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 4
104
+ evaluate_n_times_per_epoch: 1
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp477.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /home/models/exp477
2
+ best_model_path: /home/models/exp477/models/fold_0_10_best.pth
3
+ checkpoint_path: /home/models/exp477/chkp/fold_0_10_chkp.pth
4
+ config_path: /home/models/exp477/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /home/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 1024
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 6
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 6
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp477_seed10
49
+ external_dir: /home/data/external
50
+ fold: 0
51
+ interim_dir: /home/data/interim
52
+ log_path: /home/models/exp477/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: true
65
+ freeze_n_layers: 23
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: false
72
+ load_n_layers: 24
73
+ load_parts: true
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: exp475
77
+ models_dir: /home/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /home/data/processed
89
+ raw_dir: /home/data/raw
90
+ run_dir: /home/models/exp477
91
+ run_id: exp477_seed10_fold0
92
+ run_name: exp477_seed10_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 10
99
+ tokenizer: null
100
+ tokenizer_path: /home/models/exp477/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 1
104
+ evaluate_n_times_per_epoch: 1
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp478.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /home/models/exp478
2
+ best_model_path: /home/models/exp478/models/fold_0_10_best.pth
3
+ checkpoint_path: /home/models/exp478/chkp/fold_0_10_chkp.pth
4
+ config_path: /home/models/exp478/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /home/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 1024
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 6
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 6
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp478_seed10
49
+ external_dir: /home/data/external
50
+ fold: 0
51
+ interim_dir: /home/data/interim
52
+ log_path: /home/models/exp478/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: true
65
+ freeze_n_layers: 20
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: false
72
+ load_n_layers: 24
73
+ load_parts: true
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: exp475
77
+ models_dir: /home/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /home/data/processed
89
+ raw_dir: /home/data/raw
90
+ run_dir: /home/models/exp478
91
+ run_id: exp478_seed10_fold0
92
+ run_name: exp478_seed10_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 10
99
+ tokenizer: null
100
+ tokenizer_path: /home/models/exp478/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 1
104
+ evaluate_n_times_per_epoch: 1
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp489.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /llm-daig/models/exp489
2
+ best_model_path: /llm-daig/models/exp489/models/fold_0_10_best.pth
3
+ checkpoint_path: /llm-daig/models/exp489/chkp/fold_0_10_chkp.pth
4
+ config_path: /llm-daig/models/exp489/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /llm-daig/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 256
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 48
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 48
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp489_seed10
49
+ external_dir: /llm-daig/data/external
50
+ fold: 0
51
+ interim_dir: /llm-daig/data/interim
52
+ log_path: /llm-daig/models/exp489/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: false
65
+ freeze_n_layers: 0
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: false
72
+ load_n_layers: 24
73
+ load_parts: true
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: None
77
+ models_dir: /llm-daig/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /llm-daig/data/processed
89
+ raw_dir: /llm-daig/data/raw
90
+ run_dir: /llm-daig/models/exp489
91
+ run_id: exp489_seed10_fold0
92
+ run_name: exp489_seed10_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 10
99
+ tokenizer: null
100
+ tokenizer_path: /llm-daig/models/exp489/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 3
104
+ evaluate_n_times_per_epoch: 4
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp492.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /notebooks/models/exp492
2
+ best_model_path: /notebooks/models/exp492/models/fold_0_10_best.pth
3
+ checkpoint_path: /notebooks/models/exp492/chkp/fold_0_10_chkp.pth
4
+ config_path: /notebooks/models/exp492/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /notebooks/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 1024
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 6
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 6
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp492_seed10
49
+ external_dir: /notebooks/data/external
50
+ fold: 0
51
+ interim_dir: /notebooks/data/interim
52
+ log_path: /notebooks/models/exp492/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: true
65
+ freeze_n_layers: 23
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: false
72
+ load_n_layers: 24
73
+ load_parts: true
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: exp489
77
+ models_dir: /notebooks/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /notebooks/data/processed
89
+ raw_dir: /notebooks/data/raw
90
+ run_dir: /notebooks/models/exp492
91
+ run_id: exp492_seed10_fold0
92
+ run_name: exp492_seed10_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 10
99
+ tokenizer: null
100
+ tokenizer_path: /notebooks/models/exp492/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 1
104
+ evaluate_n_times_per_epoch: 1
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp500.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /work/models/exp500
2
+ best_model_path: /work/models/exp500/models/fold_0_42_best.pth
3
+ checkpoint_path: /work/models/exp500/chkp/fold_0_42_chkp.pth
4
+ config_path: /work/models/exp500/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /work/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 384
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 36
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 36
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp500_seed42
49
+ external_dir: /work/data/external
50
+ fold: 0
51
+ interim_dir: /work/data/interim
52
+ log_path: /work/models/exp500/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.0
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.0
64
+ freeze_embeddings: false
65
+ freeze_n_layers: 0
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: true
72
+ load_n_layers: 24
73
+ load_parts: true
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: None
77
+ models_dir: /work/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /work/data/processed
89
+ raw_dir: /work/data/raw
90
+ run_dir: /work/models/exp500
91
+ run_id: exp500_seed42_fold0
92
+ run_name: exp500_seed42_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 42
99
+ tokenizer: null
100
+ tokenizer_path: /work/models/exp500/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 3
104
+ evaluate_n_times_per_epoch: 16
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 10
configs/exp510.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /notebooks/models/exp510
2
+ best_model_path: /notebooks/models/exp510/models/fold_0_42_best.pth
3
+ checkpoint_path: /notebooks/models/exp510/chkp/fold_0_42_chkp.pth
4
+ config_path: /notebooks/models/exp510/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /notebooks/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 1024
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 6
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 6
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp510_seed42
49
+ external_dir: /notebooks/data/external
50
+ fold: 0
51
+ interim_dir: /notebooks/data/interim
52
+ log_path: /notebooks/models/exp510/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.0
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.0
64
+ freeze_embeddings: true
65
+ freeze_n_layers: 23
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: true
72
+ load_n_layers: 24
73
+ load_parts: true
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: exp489
77
+ models_dir: /notebooks/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 0.0
83
+ encoder_lr: 1.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /notebooks/data/processed
89
+ raw_dir: /notebooks/data/raw
90
+ run_dir: /notebooks/models/exp510
91
+ run_id: exp510_seed42_fold0
92
+ run_name: exp510_seed42_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 42
99
+ tokenizer: null
100
+ tokenizer_path: /notebooks/models/exp510/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 1
104
+ evaluate_n_times_per_epoch: 1
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp511.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /notebooks/models/exp511
2
+ best_model_path: /notebooks/models/exp511/models/fold_0_42_best.pth
3
+ checkpoint_path: /notebooks/models/exp511/chkp/fold_0_42_chkp.pth
4
+ config_path: /notebooks/models/exp511/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /notebooks/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 1024
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 6
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 6
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp511_seed42
49
+ external_dir: /notebooks/data/external
50
+ fold: 0
51
+ interim_dir: /notebooks/data/interim
52
+ log_path: /notebooks/models/exp511/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: true
65
+ freeze_n_layers: 23
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: true
72
+ load_n_layers: 24
73
+ load_parts: true
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: exp507
77
+ models_dir: /notebooks/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /notebooks/data/processed
89
+ raw_dir: /notebooks/data/raw
90
+ run_dir: /notebooks/models/exp511
91
+ run_id: exp511_seed42_fold0
92
+ run_name: exp511_seed42_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 42
99
+ tokenizer: null
100
+ tokenizer_path: /notebooks/models/exp511/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 3
104
+ evaluate_n_times_per_epoch: 1
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
configs/exp512.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backbone_config_path: /notebooks/models/exp512
2
+ best_model_path: /notebooks/models/exp512/models/fold_0_2024_best.pth
3
+ checkpoint_path: /notebooks/models/exp512/chkp/fold_0_2024_chkp.pth
4
+ config_path: /notebooks/models/exp512/config.yaml
5
+ criterion:
6
+ criterion_type: BCEWithLogitsLoss
7
+ mcrmse_loss:
8
+ weights:
9
+ - 0.5
10
+ - 0.5
11
+ mse_loss:
12
+ reduction: mean
13
+ rmse_loss:
14
+ eps: 1.0e-09
15
+ reduction: mean
16
+ smooth_l1_loss:
17
+ beta: 0.1
18
+ reduction: mean
19
+ data_dir: /notebooks/data
20
+ dataset:
21
+ bucket_batch_sampler:
22
+ bucket_size: 400
23
+ noise_factor: 0.2
24
+ folds: true
25
+ labels:
26
+ - generated
27
+ max_length: 1024
28
+ sampler_type: StratifiedBatchSampler
29
+ train_batch_size: 6
30
+ train_sources:
31
+ - daigt
32
+ - persuade
33
+ - persuade_gpt
34
+ - persuade_humanized_1
35
+ - persuade_gpt_patially_rewritten
36
+ - persuade_gpt_patially_rewritten_05
37
+ - persuade_humanized_easy_1
38
+ - daigt_gpt_patially_rewritten
39
+ - llama-mistral-partially-r
40
+ - moth
41
+ - books
42
+ - neural-chat-7b
43
+ - nbroad
44
+ valid_batch_size: 6
45
+ valid_sources:
46
+ - none
47
+ debug: false
48
+ exp_name: exp512_seed2024
49
+ external_dir: /notebooks/data/external
50
+ fold: 0
51
+ interim_dir: /notebooks/data/interim
52
+ log_path: /notebooks/models/exp512/logs/fold-0.log
53
+ logger:
54
+ job_type: training
55
+ project: DAIGT-AIE
56
+ train_print_frequency: 100
57
+ use_wandb: true
58
+ valid_print_frequency: 100
59
+ model:
60
+ architecture_type: CustomModel
61
+ attention_dropout: 0.1
62
+ backbone_type: microsoft/deberta-v3-large
63
+ dropout: 0.05
64
+ freeze_embeddings: true
65
+ freeze_n_layers: 23
66
+ gem_pooling:
67
+ eps: 1.0e-06
68
+ p: 3
69
+ gradient_checkpointing: false
70
+ load_embeddings: true
71
+ load_head: true
72
+ load_n_layers: 24
73
+ load_parts: true
74
+ pooling_type: MeanPooling
75
+ reinitialize_n_layers: 0
76
+ state_from_model: exp489
77
+ models_dir: /notebooks/models
78
+ optimizer:
79
+ beta1: 0.9
80
+ beta2: 0.999
81
+ decoder_lr: 2.0e-05
82
+ embeddings_lr: 2.0e-05
83
+ encoder_lr: 2.0e-05
84
+ eps: 1.0e-06
85
+ group_lr_multiplier: 1
86
+ n_groups: 1
87
+ weight_decay: 0.01
88
+ processed_dir: /notebooks/data/processed
89
+ raw_dir: /notebooks/data/raw
90
+ run_dir: /notebooks/models/exp512
91
+ run_id: exp512_seed2024_fold0
92
+ run_name: exp512_seed2024_fold0
93
+ scheduler:
94
+ cosine_schedule_with_warmup:
95
+ n_cycles: 0.5
96
+ n_warmup_steps: 0
97
+ type: cosine_schedule_with_warmup
98
+ seed: 2024
99
+ tokenizer: null
100
+ tokenizer_path: /notebooks/models/exp512/tokenizer
101
+ training:
102
+ apex: true
103
+ epochs: 3
104
+ evaluate_n_times_per_epoch: 1
105
+ gradient_accumulation_steps: 1
106
+ max_grad_norm: 1000
models/exp179_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:739fd06d3a687dce1a762c3300aa0b071bd548768a7cb83a950873c4bd8e3a6b
3
+ size 1735825398
models/exp184_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e4f5d832fe53c3543591ab26079f85df6a9343458299be8693a9aed540f7b46
3
+ size 1735825398
models/exp200_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d37b2f2b0d6d971aa1c1912c7eb0a4f687ab561652072eeec1605e8672457ae4
3
+ size 1735825142
models/exp222_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:018b08f29fdba158f80b9adede4c4b2bfa6ab499c690cd257ed05aaab372192d
3
+ size 1735815798
models/exp477_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b3efefc5aacf24be7387c8402f67e4f8b29422df457455653e5f7ea6b9978d9
3
+ size 1735823606
models/exp478_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3ff6b6381aefc4408bdf00320ac0d0197539f4cb50af628db08fb27763ae6a2
3
+ size 1735823734
models/exp489_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd1541ebd70323ae99f89ff85947a9fe5749f97d7ed9441a94a4af510c70b81f
3
+ size 1735840182
models/exp492_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9035bda67b861329a5ac9e847a781c67f81bcc4fd37bdd8740c55c03a08bd18
3
+ size 1735824886
models/exp500_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f718602def5bc201f878ef487c4ba7941b4eff01a3a961b4982c2ed29c34886
3
+ size 1735839798
models/exp510_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f3a0f1752b937d5324957eb3303f6243333b382fe81c2fb159126d321194009
3
+ size 1735823734
models/exp511_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c737da2b582bb7c76404582261ae9b5593282be1264b970e96d885a98bae5188
3
+ size 1735824822
models/exp512_weights.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcfb80b7299b1beabdfd573c68683a4da3999d2d6374afd6bab7b174f55aa1d1
3
+ size 1735824822
tokenizer/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
tokenizer/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }