AlekseyKorshuk
commited on
Commit
•
f59a1a3
1
Parent(s):
2c9e29c
Upload folder using huggingface_hub
Browse files- backbone_configs/exp179.json +33 -0
- backbone_configs/exp184.json +33 -0
- backbone_configs/exp200.json +33 -0
- backbone_configs/exp222.json +33 -0
- backbone_configs/exp477.json +33 -0
- backbone_configs/exp478.json +33 -0
- backbone_configs/exp489.json +33 -0
- backbone_configs/exp492.json +33 -0
- backbone_configs/exp500.json +33 -0
- backbone_configs/exp510.json +33 -0
- backbone_configs/exp511.json +33 -0
- backbone_configs/exp512.json +33 -0
- configs/exp179.yaml +106 -0
- configs/exp184.yaml +106 -0
- configs/exp200.yaml +106 -0
- configs/exp222.yaml +106 -0
- configs/exp477.yaml +106 -0
- configs/exp478.yaml +106 -0
- configs/exp489.yaml +106 -0
- configs/exp492.yaml +106 -0
- configs/exp500.yaml +106 -0
- configs/exp510.yaml +106 -0
- configs/exp511.yaml +106 -0
- configs/exp512.yaml +106 -0
- models/exp179_weights.pth +3 -0
- models/exp184_weights.pth +3 -0
- models/exp200_weights.pth +3 -0
- models/exp222_weights.pth +3 -0
- models/exp477_weights.pth +3 -0
- models/exp478_weights.pth +3 -0
- models/exp489_weights.pth +3 -0
- models/exp492_weights.pth +3 -0
- models/exp500_weights.pth +3 -0
- models/exp510_weights.pth +3 -0
- models/exp511_weights.pth +3 -0
- models/exp512_weights.pth +3 -0
- tokenizer/added_tokens.json +3 -0
- tokenizer/special_tokens_map.json +15 -0
- tokenizer/spm.model +3 -0
- tokenizer/tokenizer_config.json +58 -0
backbone_configs/exp179.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.34.1",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128001
|
33 |
+
}
|
backbone_configs/exp184.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.34.1",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128001
|
33 |
+
}
|
backbone_configs/exp200.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.05,
|
4 |
+
"attention_probs_dropout_prob": 0.05,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.34.1",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128001
|
33 |
+
}
|
backbone_configs/exp222.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.34.1",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128001
|
33 |
+
}
|
backbone_configs/exp477.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.36.2",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128100
|
33 |
+
}
|
backbone_configs/exp478.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.36.2",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128100
|
33 |
+
}
|
backbone_configs/exp489.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.36.2",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128100
|
33 |
+
}
|
backbone_configs/exp492.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.36.2",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128100
|
33 |
+
}
|
backbone_configs/exp500.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.0,
|
4 |
+
"attention_probs_dropout_prob": 0.0,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.0,
|
7 |
+
"hidden_dropout_prob": 0.0,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.36.2",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128100
|
33 |
+
}
|
backbone_configs/exp510.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.0,
|
4 |
+
"attention_probs_dropout_prob": 0.0,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.0,
|
7 |
+
"hidden_dropout_prob": 0.0,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.36.2",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128100
|
33 |
+
}
|
backbone_configs/exp511.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.36.2",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128100
|
33 |
+
}
|
backbone_configs/exp512.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "microsoft/deberta-v3-large",
|
3 |
+
"attention_dropout": 0.1,
|
4 |
+
"attention_probs_dropout_prob": 0.1,
|
5 |
+
"hidden_act": "gelu",
|
6 |
+
"hidden_dropout": 0.05,
|
7 |
+
"hidden_dropout_prob": 0.05,
|
8 |
+
"hidden_size": 1024,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 4096,
|
11 |
+
"layer_norm_eps": 1e-07,
|
12 |
+
"max_position_embeddings": 512,
|
13 |
+
"max_relative_positions": -1,
|
14 |
+
"model_type": "deberta-v2",
|
15 |
+
"norm_rel_ebd": "layer_norm",
|
16 |
+
"num_attention_heads": 16,
|
17 |
+
"num_hidden_layers": 24,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"pooler_dropout": 0,
|
20 |
+
"pooler_hidden_act": "gelu",
|
21 |
+
"pooler_hidden_size": 1024,
|
22 |
+
"pos_att_type": [
|
23 |
+
"p2c",
|
24 |
+
"c2p"
|
25 |
+
],
|
26 |
+
"position_biased_input": false,
|
27 |
+
"position_buckets": 256,
|
28 |
+
"relative_attention": true,
|
29 |
+
"share_att_key": true,
|
30 |
+
"transformers_version": "4.36.2",
|
31 |
+
"type_vocab_size": 0,
|
32 |
+
"vocab_size": 128100
|
33 |
+
}
|
configs/exp179.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /notebooks/models/exp179
|
2 |
+
best_model_path: /notebooks/models/exp179/models/fold_1_42_best.pth
|
3 |
+
checkpoint_path: /notebooks/models/exp179/chkp/fold_1_42_chkp.pth
|
4 |
+
config_path: /notebooks/models/exp179/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /notebooks/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 256
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 48
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 48
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp179_seed42
|
49 |
+
external_dir: /notebooks/data/external
|
50 |
+
fold: 1
|
51 |
+
interim_dir: /notebooks/data/interim
|
52 |
+
log_path: /notebooks/models/exp179/logs/fold-1.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: false
|
65 |
+
freeze_n_layers: 0
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: true
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: false
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: None
|
77 |
+
models_dir: /notebooks/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /notebooks/data/processed
|
89 |
+
raw_dir: /notebooks/data/raw
|
90 |
+
run_dir: /notebooks/models/exp179
|
91 |
+
run_id: exp179_seed42_fold1
|
92 |
+
run_name: exp179_seed42_fold1
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 42
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /notebooks/models/exp179/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 3
|
104 |
+
evaluate_n_times_per_epoch: 1
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp184.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /notebooks/models/exp184
|
2 |
+
best_model_path: /notebooks/models/exp184/models/fold_0_42_best.pth
|
3 |
+
checkpoint_path: /notebooks/models/exp184/chkp/fold_0_42_chkp.pth
|
4 |
+
config_path: /notebooks/models/exp184/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /notebooks/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 256
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 48
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 48
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp184_seed42
|
49 |
+
external_dir: /notebooks/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /notebooks/data/interim
|
52 |
+
log_path: /notebooks/models/exp184/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 20
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 20
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: false
|
65 |
+
freeze_n_layers: 0
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: true
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: false
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: None
|
77 |
+
models_dir: /notebooks/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /notebooks/data/processed
|
89 |
+
raw_dir: /notebooks/data/raw
|
90 |
+
run_dir: /notebooks/models/exp184
|
91 |
+
run_id: exp184_seed42_fold0
|
92 |
+
run_name: exp184_seed42_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 42
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /notebooks/models/exp184/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 3
|
104 |
+
evaluate_n_times_per_epoch: 4
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp200.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /notebooks/models/exp200
|
2 |
+
best_model_path: /notebooks/models/exp200/models/fold_0_42_best.pth
|
3 |
+
checkpoint_path: /notebooks/models/exp200/chkp/fold_0_42_chkp.pth
|
4 |
+
config_path: /notebooks/models/exp200/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /notebooks/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 512
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 12
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 12
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp200_seed42
|
49 |
+
external_dir: /notebooks/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /notebooks/data/interim
|
52 |
+
log_path: /notebooks/models/exp200/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 20
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 20
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.05
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: false
|
65 |
+
freeze_n_layers: 0
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: true
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: false
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: None
|
77 |
+
models_dir: /notebooks/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 1.5e-05
|
82 |
+
embeddings_lr: 1.5e-05
|
83 |
+
encoder_lr: 1.5e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /notebooks/data/processed
|
89 |
+
raw_dir: /notebooks/data/raw
|
90 |
+
run_dir: /notebooks/models/exp200
|
91 |
+
run_id: exp200_seed42_fold0
|
92 |
+
run_name: exp200_seed42_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 42
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /notebooks/models/exp200/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 3
|
104 |
+
evaluate_n_times_per_epoch: 4
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp222.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /notebooks/models/exp222
|
2 |
+
best_model_path: /notebooks/models/exp222/models/fold_0_2023_best.pth
|
3 |
+
checkpoint_path: /notebooks/models/exp222/chkp/fold_0_2023_chkp.pth
|
4 |
+
config_path: /notebooks/models/exp222/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /notebooks/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 256
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 48
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 48
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp222_seed2023
|
49 |
+
external_dir: /notebooks/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /notebooks/data/interim
|
52 |
+
log_path: /notebooks/models/exp222/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: false
|
65 |
+
freeze_n_layers: 0
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: true
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: false
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: None
|
77 |
+
models_dir: /notebooks/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /notebooks/data/processed
|
89 |
+
raw_dir: /notebooks/data/raw
|
90 |
+
run_dir: /notebooks/models/exp222
|
91 |
+
run_id: exp222_seed2023_fold0
|
92 |
+
run_name: exp222_seed2023_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 2023
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /notebooks/models/exp222/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 4
|
104 |
+
evaluate_n_times_per_epoch: 1
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp477.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /home/models/exp477
|
2 |
+
best_model_path: /home/models/exp477/models/fold_0_10_best.pth
|
3 |
+
checkpoint_path: /home/models/exp477/chkp/fold_0_10_chkp.pth
|
4 |
+
config_path: /home/models/exp477/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /home/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 1024
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 6
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 6
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp477_seed10
|
49 |
+
external_dir: /home/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /home/data/interim
|
52 |
+
log_path: /home/models/exp477/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: true
|
65 |
+
freeze_n_layers: 23
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: false
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: true
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: exp475
|
77 |
+
models_dir: /home/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /home/data/processed
|
89 |
+
raw_dir: /home/data/raw
|
90 |
+
run_dir: /home/models/exp477
|
91 |
+
run_id: exp477_seed10_fold0
|
92 |
+
run_name: exp477_seed10_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 10
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /home/models/exp477/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 1
|
104 |
+
evaluate_n_times_per_epoch: 1
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp478.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /home/models/exp478
|
2 |
+
best_model_path: /home/models/exp478/models/fold_0_10_best.pth
|
3 |
+
checkpoint_path: /home/models/exp478/chkp/fold_0_10_chkp.pth
|
4 |
+
config_path: /home/models/exp478/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /home/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 1024
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 6
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 6
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp478_seed10
|
49 |
+
external_dir: /home/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /home/data/interim
|
52 |
+
log_path: /home/models/exp478/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: true
|
65 |
+
freeze_n_layers: 20
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: false
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: true
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: exp475
|
77 |
+
models_dir: /home/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /home/data/processed
|
89 |
+
raw_dir: /home/data/raw
|
90 |
+
run_dir: /home/models/exp478
|
91 |
+
run_id: exp478_seed10_fold0
|
92 |
+
run_name: exp478_seed10_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 10
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /home/models/exp478/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 1
|
104 |
+
evaluate_n_times_per_epoch: 1
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp489.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /llm-daig/models/exp489
|
2 |
+
best_model_path: /llm-daig/models/exp489/models/fold_0_10_best.pth
|
3 |
+
checkpoint_path: /llm-daig/models/exp489/chkp/fold_0_10_chkp.pth
|
4 |
+
config_path: /llm-daig/models/exp489/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /llm-daig/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 256
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 48
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 48
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp489_seed10
|
49 |
+
external_dir: /llm-daig/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /llm-daig/data/interim
|
52 |
+
log_path: /llm-daig/models/exp489/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: false
|
65 |
+
freeze_n_layers: 0
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: false
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: true
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: None
|
77 |
+
models_dir: /llm-daig/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /llm-daig/data/processed
|
89 |
+
raw_dir: /llm-daig/data/raw
|
90 |
+
run_dir: /llm-daig/models/exp489
|
91 |
+
run_id: exp489_seed10_fold0
|
92 |
+
run_name: exp489_seed10_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 10
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /llm-daig/models/exp489/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 3
|
104 |
+
evaluate_n_times_per_epoch: 4
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp492.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /notebooks/models/exp492
|
2 |
+
best_model_path: /notebooks/models/exp492/models/fold_0_10_best.pth
|
3 |
+
checkpoint_path: /notebooks/models/exp492/chkp/fold_0_10_chkp.pth
|
4 |
+
config_path: /notebooks/models/exp492/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /notebooks/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 1024
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 6
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 6
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp492_seed10
|
49 |
+
external_dir: /notebooks/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /notebooks/data/interim
|
52 |
+
log_path: /notebooks/models/exp492/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: true
|
65 |
+
freeze_n_layers: 23
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: false
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: true
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: exp489
|
77 |
+
models_dir: /notebooks/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /notebooks/data/processed
|
89 |
+
raw_dir: /notebooks/data/raw
|
90 |
+
run_dir: /notebooks/models/exp492
|
91 |
+
run_id: exp492_seed10_fold0
|
92 |
+
run_name: exp492_seed10_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 10
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /notebooks/models/exp492/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 1
|
104 |
+
evaluate_n_times_per_epoch: 1
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp500.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /work/models/exp500
|
2 |
+
best_model_path: /work/models/exp500/models/fold_0_42_best.pth
|
3 |
+
checkpoint_path: /work/models/exp500/chkp/fold_0_42_chkp.pth
|
4 |
+
config_path: /work/models/exp500/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /work/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 384
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 36
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 36
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp500_seed42
|
49 |
+
external_dir: /work/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /work/data/interim
|
52 |
+
log_path: /work/models/exp500/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.0
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.0
|
64 |
+
freeze_embeddings: false
|
65 |
+
freeze_n_layers: 0
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: true
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: true
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: None
|
77 |
+
models_dir: /work/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /work/data/processed
|
89 |
+
raw_dir: /work/data/raw
|
90 |
+
run_dir: /work/models/exp500
|
91 |
+
run_id: exp500_seed42_fold0
|
92 |
+
run_name: exp500_seed42_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 42
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /work/models/exp500/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 3
|
104 |
+
evaluate_n_times_per_epoch: 16
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 10
|
configs/exp510.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /notebooks/models/exp510
|
2 |
+
best_model_path: /notebooks/models/exp510/models/fold_0_42_best.pth
|
3 |
+
checkpoint_path: /notebooks/models/exp510/chkp/fold_0_42_chkp.pth
|
4 |
+
config_path: /notebooks/models/exp510/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /notebooks/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 1024
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 6
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 6
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp510_seed42
|
49 |
+
external_dir: /notebooks/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /notebooks/data/interim
|
52 |
+
log_path: /notebooks/models/exp510/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.0
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.0
|
64 |
+
freeze_embeddings: true
|
65 |
+
freeze_n_layers: 23
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: true
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: true
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: exp489
|
77 |
+
models_dir: /notebooks/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 0.0
|
83 |
+
encoder_lr: 1.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /notebooks/data/processed
|
89 |
+
raw_dir: /notebooks/data/raw
|
90 |
+
run_dir: /notebooks/models/exp510
|
91 |
+
run_id: exp510_seed42_fold0
|
92 |
+
run_name: exp510_seed42_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 42
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /notebooks/models/exp510/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 1
|
104 |
+
evaluate_n_times_per_epoch: 1
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp511.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /notebooks/models/exp511
|
2 |
+
best_model_path: /notebooks/models/exp511/models/fold_0_42_best.pth
|
3 |
+
checkpoint_path: /notebooks/models/exp511/chkp/fold_0_42_chkp.pth
|
4 |
+
config_path: /notebooks/models/exp511/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /notebooks/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 1024
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 6
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 6
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp511_seed42
|
49 |
+
external_dir: /notebooks/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /notebooks/data/interim
|
52 |
+
log_path: /notebooks/models/exp511/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: true
|
65 |
+
freeze_n_layers: 23
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: true
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: true
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: exp507
|
77 |
+
models_dir: /notebooks/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /notebooks/data/processed
|
89 |
+
raw_dir: /notebooks/data/raw
|
90 |
+
run_dir: /notebooks/models/exp511
|
91 |
+
run_id: exp511_seed42_fold0
|
92 |
+
run_name: exp511_seed42_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 42
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /notebooks/models/exp511/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 3
|
104 |
+
evaluate_n_times_per_epoch: 1
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
configs/exp512.yaml
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
backbone_config_path: /notebooks/models/exp512
|
2 |
+
best_model_path: /notebooks/models/exp512/models/fold_0_2024_best.pth
|
3 |
+
checkpoint_path: /notebooks/models/exp512/chkp/fold_0_2024_chkp.pth
|
4 |
+
config_path: /notebooks/models/exp512/config.yaml
|
5 |
+
criterion:
|
6 |
+
criterion_type: BCEWithLogitsLoss
|
7 |
+
mcrmse_loss:
|
8 |
+
weights:
|
9 |
+
- 0.5
|
10 |
+
- 0.5
|
11 |
+
mse_loss:
|
12 |
+
reduction: mean
|
13 |
+
rmse_loss:
|
14 |
+
eps: 1.0e-09
|
15 |
+
reduction: mean
|
16 |
+
smooth_l1_loss:
|
17 |
+
beta: 0.1
|
18 |
+
reduction: mean
|
19 |
+
data_dir: /notebooks/data
|
20 |
+
dataset:
|
21 |
+
bucket_batch_sampler:
|
22 |
+
bucket_size: 400
|
23 |
+
noise_factor: 0.2
|
24 |
+
folds: true
|
25 |
+
labels:
|
26 |
+
- generated
|
27 |
+
max_length: 1024
|
28 |
+
sampler_type: StratifiedBatchSampler
|
29 |
+
train_batch_size: 6
|
30 |
+
train_sources:
|
31 |
+
- daigt
|
32 |
+
- persuade
|
33 |
+
- persuade_gpt
|
34 |
+
- persuade_humanized_1
|
35 |
+
- persuade_gpt_patially_rewritten
|
36 |
+
- persuade_gpt_patially_rewritten_05
|
37 |
+
- persuade_humanized_easy_1
|
38 |
+
- daigt_gpt_patially_rewritten
|
39 |
+
- llama-mistral-partially-r
|
40 |
+
- moth
|
41 |
+
- books
|
42 |
+
- neural-chat-7b
|
43 |
+
- nbroad
|
44 |
+
valid_batch_size: 6
|
45 |
+
valid_sources:
|
46 |
+
- none
|
47 |
+
debug: false
|
48 |
+
exp_name: exp512_seed2024
|
49 |
+
external_dir: /notebooks/data/external
|
50 |
+
fold: 0
|
51 |
+
interim_dir: /notebooks/data/interim
|
52 |
+
log_path: /notebooks/models/exp512/logs/fold-0.log
|
53 |
+
logger:
|
54 |
+
job_type: training
|
55 |
+
project: DAIGT-AIE
|
56 |
+
train_print_frequency: 100
|
57 |
+
use_wandb: true
|
58 |
+
valid_print_frequency: 100
|
59 |
+
model:
|
60 |
+
architecture_type: CustomModel
|
61 |
+
attention_dropout: 0.1
|
62 |
+
backbone_type: microsoft/deberta-v3-large
|
63 |
+
dropout: 0.05
|
64 |
+
freeze_embeddings: true
|
65 |
+
freeze_n_layers: 23
|
66 |
+
gem_pooling:
|
67 |
+
eps: 1.0e-06
|
68 |
+
p: 3
|
69 |
+
gradient_checkpointing: false
|
70 |
+
load_embeddings: true
|
71 |
+
load_head: true
|
72 |
+
load_n_layers: 24
|
73 |
+
load_parts: true
|
74 |
+
pooling_type: MeanPooling
|
75 |
+
reinitialize_n_layers: 0
|
76 |
+
state_from_model: exp489
|
77 |
+
models_dir: /notebooks/models
|
78 |
+
optimizer:
|
79 |
+
beta1: 0.9
|
80 |
+
beta2: 0.999
|
81 |
+
decoder_lr: 2.0e-05
|
82 |
+
embeddings_lr: 2.0e-05
|
83 |
+
encoder_lr: 2.0e-05
|
84 |
+
eps: 1.0e-06
|
85 |
+
group_lr_multiplier: 1
|
86 |
+
n_groups: 1
|
87 |
+
weight_decay: 0.01
|
88 |
+
processed_dir: /notebooks/data/processed
|
89 |
+
raw_dir: /notebooks/data/raw
|
90 |
+
run_dir: /notebooks/models/exp512
|
91 |
+
run_id: exp512_seed2024_fold0
|
92 |
+
run_name: exp512_seed2024_fold0
|
93 |
+
scheduler:
|
94 |
+
cosine_schedule_with_warmup:
|
95 |
+
n_cycles: 0.5
|
96 |
+
n_warmup_steps: 0
|
97 |
+
type: cosine_schedule_with_warmup
|
98 |
+
seed: 2024
|
99 |
+
tokenizer: null
|
100 |
+
tokenizer_path: /notebooks/models/exp512/tokenizer
|
101 |
+
training:
|
102 |
+
apex: true
|
103 |
+
epochs: 3
|
104 |
+
evaluate_n_times_per_epoch: 1
|
105 |
+
gradient_accumulation_steps: 1
|
106 |
+
max_grad_norm: 1000
|
models/exp179_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:739fd06d3a687dce1a762c3300aa0b071bd548768a7cb83a950873c4bd8e3a6b
|
3 |
+
size 1735825398
|
models/exp184_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e4f5d832fe53c3543591ab26079f85df6a9343458299be8693a9aed540f7b46
|
3 |
+
size 1735825398
|
models/exp200_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d37b2f2b0d6d971aa1c1912c7eb0a4f687ab561652072eeec1605e8672457ae4
|
3 |
+
size 1735825142
|
models/exp222_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:018b08f29fdba158f80b9adede4c4b2bfa6ab499c690cd257ed05aaab372192d
|
3 |
+
size 1735815798
|
models/exp477_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b3efefc5aacf24be7387c8402f67e4f8b29422df457455653e5f7ea6b9978d9
|
3 |
+
size 1735823606
|
models/exp478_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3ff6b6381aefc4408bdf00320ac0d0197539f4cb50af628db08fb27763ae6a2
|
3 |
+
size 1735823734
|
models/exp489_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd1541ebd70323ae99f89ff85947a9fe5749f97d7ed9441a94a4af510c70b81f
|
3 |
+
size 1735840182
|
models/exp492_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9035bda67b861329a5ac9e847a781c67f81bcc4fd37bdd8740c55c03a08bd18
|
3 |
+
size 1735824886
|
models/exp500_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f718602def5bc201f878ef487c4ba7941b4eff01a3a961b4982c2ed29c34886
|
3 |
+
size 1735839798
|
models/exp510_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f3a0f1752b937d5324957eb3303f6243333b382fe81c2fb159126d321194009
|
3 |
+
size 1735823734
|
models/exp511_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c737da2b582bb7c76404582261ae9b5593282be1264b970e96d885a98bae5188
|
3 |
+
size 1735824822
|
models/exp512_weights.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fcfb80b7299b1beabdfd573c68683a4da3999d2d6374afd6bab7b174f55aa1d1
|
3 |
+
size 1735824822
|
tokenizer/added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[MASK]": 128000
|
3 |
+
}
|
tokenizer/special_tokens_map.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "[CLS]",
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"eos_token": "[SEP]",
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"pad_token": "[PAD]",
|
7 |
+
"sep_token": "[SEP]",
|
8 |
+
"unk_token": {
|
9 |
+
"content": "[UNK]",
|
10 |
+
"lstrip": false,
|
11 |
+
"normalized": true,
|
12 |
+
"rstrip": false,
|
13 |
+
"single_word": false
|
14 |
+
}
|
15 |
+
}
|
tokenizer/spm.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
|
3 |
+
size 2464616
|
tokenizer/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "[CLS]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "[SEP]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "[UNK]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"128000": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "[CLS]",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "[CLS]",
|
47 |
+
"do_lower_case": false,
|
48 |
+
"eos_token": "[SEP]",
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"model_max_length": 1000000000000000019884624838656,
|
51 |
+
"pad_token": "[PAD]",
|
52 |
+
"sep_token": "[SEP]",
|
53 |
+
"sp_model_kwargs": {},
|
54 |
+
"split_by_punct": false,
|
55 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
56 |
+
"unk_token": "[UNK]",
|
57 |
+
"vocab_type": "spm"
|
58 |
+
}
|