3v324v23 commited on
Commit
bcba58a
1 Parent(s): f4bdf0f

remove unused checkoints

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. baseline_bs4_top1.log +0 -0
  2. baseline_bs4_top1/checkpoint-10000/config.json +0 -62
  3. baseline_bs4_top1/checkpoint-10000/generation_config.json +0 -7
  4. baseline_bs4_top1/checkpoint-10000/optimizer.pt +0 -3
  5. baseline_bs4_top1/checkpoint-10000/pytorch_model.bin +0 -3
  6. baseline_bs4_top1/checkpoint-10000/rng_state.pth +0 -3
  7. baseline_bs4_top1/checkpoint-10000/scheduler.pt +0 -3
  8. baseline_bs4_top1/checkpoint-10000/special_tokens_map.json +0 -107
  9. baseline_bs4_top1/checkpoint-10000/spiece.model +0 -3
  10. baseline_bs4_top1/checkpoint-10000/tokenizer.json +0 -0
  11. baseline_bs4_top1/checkpoint-10000/tokenizer_config.json +0 -112
  12. baseline_bs4_top1/checkpoint-10000/trainer_state.json +0 -139
  13. baseline_bs4_top1/checkpoint-10000/training_args.bin +0 -3
  14. baseline_bs4_top1/checkpoint-20000/config.json +0 -62
  15. baseline_bs4_top1/checkpoint-20000/generation_config.json +0 -7
  16. baseline_bs4_top1/checkpoint-20000/optimizer.pt +0 -3
  17. baseline_bs4_top1/checkpoint-20000/pytorch_model.bin +0 -3
  18. baseline_bs4_top1/checkpoint-20000/rng_state.pth +0 -3
  19. baseline_bs4_top1/checkpoint-20000/scheduler.pt +0 -3
  20. baseline_bs4_top1/checkpoint-20000/special_tokens_map.json +0 -107
  21. baseline_bs4_top1/checkpoint-20000/spiece.model +0 -3
  22. baseline_bs4_top1/checkpoint-20000/tokenizer.json +0 -0
  23. baseline_bs4_top1/checkpoint-20000/tokenizer_config.json +0 -112
  24. baseline_bs4_top1/checkpoint-20000/trainer_state.json +0 -259
  25. baseline_bs4_top1/checkpoint-20000/training_args.bin +0 -3
  26. baseline_bs4_top1/data_config.json +0 -1
  27. baseline_bs4_top1/hfmodel_config.json +0 -1
  28. baseline_bs4_top1/model_config.json +0 -1
  29. baseline_bs4_top1/train_config.json +0 -1
  30. baseline_bs4_top2.log +0 -0
  31. baseline_bs4_top2/checkpoint-10000/config.json +0 -62
  32. baseline_bs4_top2/checkpoint-10000/generation_config.json +0 -7
  33. baseline_bs4_top2/checkpoint-10000/optimizer.pt +0 -3
  34. baseline_bs4_top2/checkpoint-10000/pytorch_model.bin +0 -3
  35. baseline_bs4_top2/checkpoint-10000/rng_state.pth +0 -3
  36. baseline_bs4_top2/checkpoint-10000/scheduler.pt +0 -3
  37. baseline_bs4_top2/checkpoint-10000/special_tokens_map.json +0 -107
  38. baseline_bs4_top2/checkpoint-10000/spiece.model +0 -3
  39. baseline_bs4_top2/checkpoint-10000/tokenizer.json +0 -0
  40. baseline_bs4_top2/checkpoint-10000/tokenizer_config.json +0 -112
  41. baseline_bs4_top2/checkpoint-10000/trainer_state.json +0 -139
  42. baseline_bs4_top2/checkpoint-10000/training_args.bin +0 -3
  43. baseline_bs4_top2/checkpoint-20000/config.json +0 -62
  44. baseline_bs4_top2/checkpoint-20000/generation_config.json +0 -7
  45. baseline_bs4_top2/checkpoint-20000/optimizer.pt +0 -3
  46. baseline_bs4_top2/checkpoint-20000/pytorch_model.bin +0 -3
  47. baseline_bs4_top2/checkpoint-20000/rng_state.pth +0 -3
  48. baseline_bs4_top2/checkpoint-20000/scheduler.pt +0 -3
  49. baseline_bs4_top2/checkpoint-20000/special_tokens_map.json +0 -107
  50. baseline_bs4_top2/checkpoint-20000/spiece.model +0 -3
baseline_bs4_top1.log DELETED
The diff for this file is too large to render. See raw diff
 
baseline_bs4_top1/checkpoint-10000/config.json DELETED
@@ -1,62 +0,0 @@
1
- {
2
- "_name_or_path": "google/flan-t5-base",
3
- "architectures": [
4
- "SoftRelPromptFlanT5"
5
- ],
6
- "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
- "d_kv": 64,
9
- "d_model": 768,
10
- "decoder_start_token_id": 0,
11
- "dense_act_fn": "gelu_new",
12
- "dropout_rate": 0.1,
13
- "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
- "initializer_factor": 1.0,
16
- "is_encoder_decoder": true,
17
- "is_gated_act": true,
18
- "layer_norm_epsilon": 1e-06,
19
- "model_type": "t5",
20
- "n_positions": 512,
21
- "num_decoder_layers": 12,
22
- "num_heads": 12,
23
- "num_layers": 12,
24
- "output_past": true,
25
- "pad_token_id": 0,
26
- "relative_attention_max_distance": 128,
27
- "relative_attention_num_buckets": 32,
28
- "task_specific_params": {
29
- "summarization": {
30
- "early_stopping": true,
31
- "length_penalty": 2.0,
32
- "max_length": 200,
33
- "min_length": 30,
34
- "no_repeat_ngram_size": 3,
35
- "num_beams": 4,
36
- "prefix": "summarize: "
37
- },
38
- "translation_en_to_de": {
39
- "early_stopping": true,
40
- "max_length": 300,
41
- "num_beams": 4,
42
- "prefix": "translate English to German: "
43
- },
44
- "translation_en_to_fr": {
45
- "early_stopping": true,
46
- "max_length": 300,
47
- "num_beams": 4,
48
- "prefix": "translate English to French: "
49
- },
50
- "translation_en_to_ro": {
51
- "early_stopping": true,
52
- "max_length": 300,
53
- "num_beams": 4,
54
- "prefix": "translate English to Romanian: "
55
- }
56
- },
57
- "tie_word_embeddings": false,
58
- "torch_dtype": "float32",
59
- "transformers_version": "4.33.1",
60
- "use_cache": true,
61
- "vocab_size": 32128
62
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-10000/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "decoder_start_token_id": 0,
4
- "eos_token_id": 1,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.33.1"
7
- }
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-10000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c0ceab0ed46acb0ec16d626a5fcc26798ea2d7a8eddcbfe151546635d969fb
3
- size 144545
 
 
 
 
baseline_bs4_top1/checkpoint-10000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e12a403413d9634b6e7804e3e2e1979f6566c15d89f196db0bc292bd6885c61
3
- size 990480513
 
 
 
 
baseline_bs4_top1/checkpoint-10000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:de4c87fc2dfbf5a627f8c2a0575b0effa1f233623d0165ebcd993a60952af24b
3
- size 14575
 
 
 
 
baseline_bs4_top1/checkpoint-10000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:89744af0d534dd9add5a42ebd997c43178aeb78f0f65e79af8379d8a5c11b73a
3
- size 627
 
 
 
 
baseline_bs4_top1/checkpoint-10000/special_tokens_map.json DELETED
@@ -1,107 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
107
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-10000/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
 
 
 
baseline_bs4_top1/checkpoint-10000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
baseline_bs4_top1/checkpoint-10000/tokenizer_config.json DELETED
@@ -1,112 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "clean_up_tokenization_spaces": true,
105
- "eos_token": "</s>",
106
- "extra_ids": 100,
107
- "model_max_length": 512,
108
- "pad_token": "<pad>",
109
- "sp_model_kwargs": {},
110
- "tokenizer_class": "T5Tokenizer",
111
- "unk_token": "<unk>"
112
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-10000/trainer_state.json DELETED
@@ -1,139 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.008527584604298755,
5
- "eval_steps": 500,
6
- "global_step": 10000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0,
13
- "learning_rate": 0.01,
14
- "loss": 1.6476,
15
- "step": 500
16
- },
17
- {
18
- "epoch": 0.0,
19
- "learning_rate": 0.01,
20
- "loss": 1.5631,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 0.0,
25
- "learning_rate": 0.01,
26
- "loss": 1.5854,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 0.0,
31
- "learning_rate": 0.01,
32
- "loss": 1.5557,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 0.0,
37
- "learning_rate": 0.01,
38
- "loss": 1.5653,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 0.0,
43
- "learning_rate": 0.01,
44
- "loss": 1.5457,
45
- "step": 3000
46
- },
47
- {
48
- "epoch": 0.0,
49
- "learning_rate": 0.01,
50
- "loss": 1.5552,
51
- "step": 3500
52
- },
53
- {
54
- "epoch": 0.0,
55
- "learning_rate": 0.01,
56
- "loss": 1.5559,
57
- "step": 4000
58
- },
59
- {
60
- "epoch": 0.0,
61
- "learning_rate": 0.01,
62
- "loss": 1.5465,
63
- "step": 4500
64
- },
65
- {
66
- "epoch": 0.0,
67
- "learning_rate": 0.01,
68
- "loss": 1.5481,
69
- "step": 5000
70
- },
71
- {
72
- "epoch": 0.0,
73
- "learning_rate": 0.01,
74
- "loss": 1.5311,
75
- "step": 5500
76
- },
77
- {
78
- "epoch": 0.01,
79
- "learning_rate": 0.01,
80
- "loss": 1.5356,
81
- "step": 6000
82
- },
83
- {
84
- "epoch": 0.01,
85
- "learning_rate": 0.01,
86
- "loss": 1.5502,
87
- "step": 6500
88
- },
89
- {
90
- "epoch": 0.01,
91
- "learning_rate": 0.01,
92
- "loss": 1.527,
93
- "step": 7000
94
- },
95
- {
96
- "epoch": 0.01,
97
- "learning_rate": 0.01,
98
- "loss": 1.5383,
99
- "step": 7500
100
- },
101
- {
102
- "epoch": 0.01,
103
- "learning_rate": 0.01,
104
- "loss": 1.5064,
105
- "step": 8000
106
- },
107
- {
108
- "epoch": 0.01,
109
- "learning_rate": 0.01,
110
- "loss": 1.5271,
111
- "step": 8500
112
- },
113
- {
114
- "epoch": 0.01,
115
- "learning_rate": 0.01,
116
- "loss": 1.5295,
117
- "step": 9000
118
- },
119
- {
120
- "epoch": 0.01,
121
- "learning_rate": 0.01,
122
- "loss": 1.5098,
123
- "step": 9500
124
- },
125
- {
126
- "epoch": 0.01,
127
- "learning_rate": 0.01,
128
- "loss": 1.53,
129
- "step": 10000
130
- }
131
- ],
132
- "logging_steps": 500,
133
- "max_steps": 20000,
134
- "num_train_epochs": 1,
135
- "save_steps": 10000,
136
- "total_flos": 4.829257277256499e+16,
137
- "trial_name": null,
138
- "trial_params": null
139
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-10000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09ab173dc6fc36e5747aa5255939206d9c965bbe4469338c8b96de7a0faed00a
3
- size 4539
 
 
 
 
baseline_bs4_top1/checkpoint-20000/config.json DELETED
@@ -1,62 +0,0 @@
1
- {
2
- "_name_or_path": "google/flan-t5-base",
3
- "architectures": [
4
- "SoftRelPromptFlanT5"
5
- ],
6
- "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
- "d_kv": 64,
9
- "d_model": 768,
10
- "decoder_start_token_id": 0,
11
- "dense_act_fn": "gelu_new",
12
- "dropout_rate": 0.1,
13
- "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
- "initializer_factor": 1.0,
16
- "is_encoder_decoder": true,
17
- "is_gated_act": true,
18
- "layer_norm_epsilon": 1e-06,
19
- "model_type": "t5",
20
- "n_positions": 512,
21
- "num_decoder_layers": 12,
22
- "num_heads": 12,
23
- "num_layers": 12,
24
- "output_past": true,
25
- "pad_token_id": 0,
26
- "relative_attention_max_distance": 128,
27
- "relative_attention_num_buckets": 32,
28
- "task_specific_params": {
29
- "summarization": {
30
- "early_stopping": true,
31
- "length_penalty": 2.0,
32
- "max_length": 200,
33
- "min_length": 30,
34
- "no_repeat_ngram_size": 3,
35
- "num_beams": 4,
36
- "prefix": "summarize: "
37
- },
38
- "translation_en_to_de": {
39
- "early_stopping": true,
40
- "max_length": 300,
41
- "num_beams": 4,
42
- "prefix": "translate English to German: "
43
- },
44
- "translation_en_to_fr": {
45
- "early_stopping": true,
46
- "max_length": 300,
47
- "num_beams": 4,
48
- "prefix": "translate English to French: "
49
- },
50
- "translation_en_to_ro": {
51
- "early_stopping": true,
52
- "max_length": 300,
53
- "num_beams": 4,
54
- "prefix": "translate English to Romanian: "
55
- }
56
- },
57
- "tie_word_embeddings": false,
58
- "torch_dtype": "float32",
59
- "transformers_version": "4.33.1",
60
- "use_cache": true,
61
- "vocab_size": 32128
62
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-20000/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "decoder_start_token_id": 0,
4
- "eos_token_id": 1,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.33.1"
7
- }
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-20000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d89bdef1fa8581fbf366465c2c48a742068cc56363ee861230021037b25a7a53
3
- size 144545
 
 
 
 
baseline_bs4_top1/checkpoint-20000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:979953b9f2b37eb669d4d0a5cdaf0a0fa69b4432d7bf17322cc56e064d696559
3
- size 990480513
 
 
 
 
baseline_bs4_top1/checkpoint-20000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ee65fd173e43a5bb96f2f07bf1e86b7666cd24f1ff7c2f132f19e39ccc7b2b9
3
- size 14575
 
 
 
 
baseline_bs4_top1/checkpoint-20000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3ddfbcd33fc0d81c222807ca3e42cd9654f7e531f573941ed9599b1e07e0373
3
- size 627
 
 
 
 
baseline_bs4_top1/checkpoint-20000/special_tokens_map.json DELETED
@@ -1,107 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
107
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-20000/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
 
 
 
baseline_bs4_top1/checkpoint-20000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
baseline_bs4_top1/checkpoint-20000/tokenizer_config.json DELETED
@@ -1,112 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "clean_up_tokenization_spaces": true,
105
- "eos_token": "</s>",
106
- "extra_ids": 100,
107
- "model_max_length": 512,
108
- "pad_token": "<pad>",
109
- "sp_model_kwargs": {},
110
- "tokenizer_class": "T5Tokenizer",
111
- "unk_token": "<unk>"
112
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-20000/trainer_state.json DELETED
@@ -1,259 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.01705516920859751,
5
- "eval_steps": 500,
6
- "global_step": 20000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0,
13
- "learning_rate": 0.01,
14
- "loss": 1.6476,
15
- "step": 500
16
- },
17
- {
18
- "epoch": 0.0,
19
- "learning_rate": 0.01,
20
- "loss": 1.5631,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 0.0,
25
- "learning_rate": 0.01,
26
- "loss": 1.5854,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 0.0,
31
- "learning_rate": 0.01,
32
- "loss": 1.5557,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 0.0,
37
- "learning_rate": 0.01,
38
- "loss": 1.5653,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 0.0,
43
- "learning_rate": 0.01,
44
- "loss": 1.5457,
45
- "step": 3000
46
- },
47
- {
48
- "epoch": 0.0,
49
- "learning_rate": 0.01,
50
- "loss": 1.5552,
51
- "step": 3500
52
- },
53
- {
54
- "epoch": 0.0,
55
- "learning_rate": 0.01,
56
- "loss": 1.5559,
57
- "step": 4000
58
- },
59
- {
60
- "epoch": 0.0,
61
- "learning_rate": 0.01,
62
- "loss": 1.5465,
63
- "step": 4500
64
- },
65
- {
66
- "epoch": 0.0,
67
- "learning_rate": 0.01,
68
- "loss": 1.5481,
69
- "step": 5000
70
- },
71
- {
72
- "epoch": 0.0,
73
- "learning_rate": 0.01,
74
- "loss": 1.5311,
75
- "step": 5500
76
- },
77
- {
78
- "epoch": 0.01,
79
- "learning_rate": 0.01,
80
- "loss": 1.5356,
81
- "step": 6000
82
- },
83
- {
84
- "epoch": 0.01,
85
- "learning_rate": 0.01,
86
- "loss": 1.5502,
87
- "step": 6500
88
- },
89
- {
90
- "epoch": 0.01,
91
- "learning_rate": 0.01,
92
- "loss": 1.527,
93
- "step": 7000
94
- },
95
- {
96
- "epoch": 0.01,
97
- "learning_rate": 0.01,
98
- "loss": 1.5383,
99
- "step": 7500
100
- },
101
- {
102
- "epoch": 0.01,
103
- "learning_rate": 0.01,
104
- "loss": 1.5064,
105
- "step": 8000
106
- },
107
- {
108
- "epoch": 0.01,
109
- "learning_rate": 0.01,
110
- "loss": 1.5271,
111
- "step": 8500
112
- },
113
- {
114
- "epoch": 0.01,
115
- "learning_rate": 0.01,
116
- "loss": 1.5295,
117
- "step": 9000
118
- },
119
- {
120
- "epoch": 0.01,
121
- "learning_rate": 0.01,
122
- "loss": 1.5098,
123
- "step": 9500
124
- },
125
- {
126
- "epoch": 0.01,
127
- "learning_rate": 0.01,
128
- "loss": 1.53,
129
- "step": 10000
130
- },
131
- {
132
- "epoch": 0.01,
133
- "learning_rate": 0.01,
134
- "loss": 1.5387,
135
- "step": 10500
136
- },
137
- {
138
- "epoch": 0.01,
139
- "learning_rate": 0.01,
140
- "loss": 1.5176,
141
- "step": 11000
142
- },
143
- {
144
- "epoch": 0.01,
145
- "learning_rate": 0.01,
146
- "loss": 1.5296,
147
- "step": 11500
148
- },
149
- {
150
- "epoch": 0.01,
151
- "learning_rate": 0.01,
152
- "loss": 1.5416,
153
- "step": 12000
154
- },
155
- {
156
- "epoch": 0.01,
157
- "learning_rate": 0.01,
158
- "loss": 1.514,
159
- "step": 12500
160
- },
161
- {
162
- "epoch": 0.01,
163
- "learning_rate": 0.01,
164
- "loss": 1.4975,
165
- "step": 13000
166
- },
167
- {
168
- "epoch": 0.01,
169
- "learning_rate": 0.01,
170
- "loss": 1.5488,
171
- "step": 13500
172
- },
173
- {
174
- "epoch": 0.01,
175
- "learning_rate": 0.01,
176
- "loss": 1.4987,
177
- "step": 14000
178
- },
179
- {
180
- "epoch": 0.01,
181
- "learning_rate": 0.01,
182
- "loss": 1.4859,
183
- "step": 14500
184
- },
185
- {
186
- "epoch": 0.01,
187
- "learning_rate": 0.01,
188
- "loss": 1.5495,
189
- "step": 15000
190
- },
191
- {
192
- "epoch": 0.01,
193
- "learning_rate": 0.01,
194
- "loss": 1.5347,
195
- "step": 15500
196
- },
197
- {
198
- "epoch": 0.01,
199
- "learning_rate": 0.01,
200
- "loss": 1.5225,
201
- "step": 16000
202
- },
203
- {
204
- "epoch": 0.01,
205
- "learning_rate": 0.01,
206
- "loss": 1.537,
207
- "step": 16500
208
- },
209
- {
210
- "epoch": 0.01,
211
- "learning_rate": 0.01,
212
- "loss": 1.512,
213
- "step": 17000
214
- },
215
- {
216
- "epoch": 0.01,
217
- "learning_rate": 0.01,
218
- "loss": 1.5263,
219
- "step": 17500
220
- },
221
- {
222
- "epoch": 0.02,
223
- "learning_rate": 0.01,
224
- "loss": 1.5188,
225
- "step": 18000
226
- },
227
- {
228
- "epoch": 0.02,
229
- "learning_rate": 0.01,
230
- "loss": 1.5163,
231
- "step": 18500
232
- },
233
- {
234
- "epoch": 0.02,
235
- "learning_rate": 0.01,
236
- "loss": 1.5372,
237
- "step": 19000
238
- },
239
- {
240
- "epoch": 0.02,
241
- "learning_rate": 0.01,
242
- "loss": 1.5225,
243
- "step": 19500
244
- },
245
- {
246
- "epoch": 0.02,
247
- "learning_rate": 0.01,
248
- "loss": 1.5114,
249
- "step": 20000
250
- }
251
- ],
252
- "logging_steps": 500,
253
- "max_steps": 20000,
254
- "num_train_epochs": 1,
255
- "save_steps": 10000,
256
- "total_flos": 9.652535293088563e+16,
257
- "trial_name": null,
258
- "trial_params": null
259
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top1/checkpoint-20000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09ab173dc6fc36e5747aa5255939206d9c965bbe4469338c8b96de7a0faed00a
3
- size 4539
 
 
 
 
baseline_bs4_top1/data_config.json DELETED
@@ -1 +0,0 @@
1
- {"dataset_config_name": null, "overwrite_cache": false, "preprocessing_num_workers": null, "train_file": "/home/jhju/datasets/nils.sentence.transformers/ce.minilm.hardneg.vL.jsonl", "eval_file": null, "max_p_length": 128, "max_q_length": 16, "m_negative_per_example": 4, "m_positive_per_example": 4, "random_corrupt_rate": 0.0}
 
 
baseline_bs4_top1/hfmodel_config.json DELETED
@@ -1 +0,0 @@
1
- {"model_name_or_path": "google/flan-t5-base", "config_name": "google/flan-t5-base", "tokenizer_name": "google/flan-t5-base", "cache_dir": null, "use_fast_tokenizer": true, "use_auth_token": false}
 
 
baseline_bs4_top1/model_config.json DELETED
@@ -1 +0,0 @@
1
- {"add_classification_head": false, "baseline_prefix": "{1}", "instruction_prompt": "Generate a question for the passage with relevance label: ", "instruction_prompt_idx": [6939, 2206, 3, 9, 822, 21, 8, 5454, 28, 20208, 3783, 10, 3], "pos_neg_prompt": null, "pos_neg_prompt_idx": null, "relevant_prompt": "true true true true true", "relevant_prompt_idx": [1176, 1176, 1176, 1176, 1176], "irrelevant_prompt": "false false false false false", "irrelevant_prompt_idx": [6136, 6136, 6136, 6136, 6136], "head_size": 64, "pooling": "mean", "activation": "sigmoid", "latent_size": 128, "activate_prompt_attention": true}
 
 
baseline_bs4_top1/train_config.json DELETED
@@ -1 +0,0 @@
1
- {"output_dir": "/work/jhju/readqg-baseline//baseline_bs4_top1", "overwrite_output_dir": true, "do_train": true, "do_eval": false, "do_predict": false, "evaluation_strategy": "no", "prediction_loss_only": false, "per_device_train_batch_size": 4, "per_device_eval_batch_size": 4, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 1, "eval_accumulation_steps": null, "eval_delay": 0, "learning_rate": 0.01, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3.0, "max_steps": 20000, "lr_scheduler_type": "constant", "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": "passive", "log_level_replica": "warning", "log_on_each_node": true, "logging_dir": "./logs", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 500, "logging_nan_inf_filter": true, "save_strategy": "steps", "save_steps": 10000, "save_total_limit": 5, "save_safetensors": false, "save_on_each_node": false, "no_cuda": false, "use_cpu": false, "use_mps_device": false, "seed": 42, "data_seed": null, "jit_mode_eval": false, "use_ipex": false, "bf16": false, "fp16": false, "fp16_opt_level": "O1", "half_precision_backend": "auto", "bf16_full_eval": false, "fp16_full_eval": false, "tf32": null, "local_rank": 0, "ddp_backend": null, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": false, "eval_steps": 500, "dataloader_num_workers": 0, "past_index": -1, "run_name": "prompt=5_batch=4_sample=top1", "disable_tqdm": false, "remove_unused_columns": false, "label_names": null, "load_best_model_at_end": false, "metric_for_best_model": null, "greater_is_better": null, "ignore_data_skip": false, "sharded_ddp": [], "fsdp": [], "fsdp_min_num_params": 0, "fsdp_config": {"min_num_params": 0, "xla": false, "xla_fsdp_grad_ckpt": false}, "fsdp_transformer_layer_cls_to_wrap": null, "deepspeed": null, "label_smoothing_factor": 0.0, "optim": "adamw_torch", "optim_args": null, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": ["wandb"], "ddp_find_unused_parameters": null, "ddp_bucket_cap_mb": null, "ddp_broadcast_buffers": null, "dataloader_pin_memory": true, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "hub_model_id": null, "hub_strategy": "every_save", "hub_token": null, "hub_private_repo": false, "hub_always_push": false, "gradient_checkpointing": true, "include_inputs_for_metrics": false, "fp16_backend": "auto", "push_to_hub_model_id": null, "push_to_hub_organization": null, "push_to_hub_token": null, "_n_gpu": 1, "mp_parameters": "", "auto_find_batch_size": false, "full_determinism": false, "torchdynamo": null, "ray_scope": "last", "ddp_timeout": 1800, "torch_compile": false, "torch_compile_backend": null, "torch_compile_mode": null, "dispatch_batches": null, "sortish_sampler": false, "predict_with_generate": false, "generation_max_length": null, "generation_num_beams": null, "generation_config": null, "random_init": false, "enable_unlikelihood": false, "enable_calibration": null, "calibration_margin_ngrams": null, "gamma": 1.0, "enable_similarity_loss": null, "document_wise_contrastive": false, "relevance_wise_contrastive": false, "tau": 1.0, "sample_random": true, "sample_topk": 1, "enable_vae_loss": false, "k": 0.0025, "x0": 2500, "annealing_fn": "logistic"}
 
 
baseline_bs4_top2.log DELETED
The diff for this file is too large to render. See raw diff
 
baseline_bs4_top2/checkpoint-10000/config.json DELETED
@@ -1,62 +0,0 @@
1
- {
2
- "_name_or_path": "google/flan-t5-base",
3
- "architectures": [
4
- "SoftRelPromptFlanT5"
5
- ],
6
- "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
- "d_kv": 64,
9
- "d_model": 768,
10
- "decoder_start_token_id": 0,
11
- "dense_act_fn": "gelu_new",
12
- "dropout_rate": 0.1,
13
- "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
- "initializer_factor": 1.0,
16
- "is_encoder_decoder": true,
17
- "is_gated_act": true,
18
- "layer_norm_epsilon": 1e-06,
19
- "model_type": "t5",
20
- "n_positions": 512,
21
- "num_decoder_layers": 12,
22
- "num_heads": 12,
23
- "num_layers": 12,
24
- "output_past": true,
25
- "pad_token_id": 0,
26
- "relative_attention_max_distance": 128,
27
- "relative_attention_num_buckets": 32,
28
- "task_specific_params": {
29
- "summarization": {
30
- "early_stopping": true,
31
- "length_penalty": 2.0,
32
- "max_length": 200,
33
- "min_length": 30,
34
- "no_repeat_ngram_size": 3,
35
- "num_beams": 4,
36
- "prefix": "summarize: "
37
- },
38
- "translation_en_to_de": {
39
- "early_stopping": true,
40
- "max_length": 300,
41
- "num_beams": 4,
42
- "prefix": "translate English to German: "
43
- },
44
- "translation_en_to_fr": {
45
- "early_stopping": true,
46
- "max_length": 300,
47
- "num_beams": 4,
48
- "prefix": "translate English to French: "
49
- },
50
- "translation_en_to_ro": {
51
- "early_stopping": true,
52
- "max_length": 300,
53
- "num_beams": 4,
54
- "prefix": "translate English to Romanian: "
55
- }
56
- },
57
- "tie_word_embeddings": false,
58
- "torch_dtype": "float32",
59
- "transformers_version": "4.33.1",
60
- "use_cache": true,
61
- "vocab_size": 32128
62
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top2/checkpoint-10000/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "decoder_start_token_id": 0,
4
- "eos_token_id": 1,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.33.1"
7
- }
 
 
 
 
 
 
 
 
baseline_bs4_top2/checkpoint-10000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc67f5c5dc22f7c375b0c2101d705d270abdc690e52970bb6e6d499ca53cc6a7
3
- size 144545
 
 
 
 
baseline_bs4_top2/checkpoint-10000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d366a12d59545d20daa0dbcde33cbac47216dd084289a717889298622bb0e2ae
3
- size 990480513
 
 
 
 
baseline_bs4_top2/checkpoint-10000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:89428fc2bed9ce2a97559ad926183a8a8fb059a55491935c6cdb5773685812f4
3
- size 14511
 
 
 
 
baseline_bs4_top2/checkpoint-10000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:89744af0d534dd9add5a42ebd997c43178aeb78f0f65e79af8379d8a5c11b73a
3
- size 627
 
 
 
 
baseline_bs4_top2/checkpoint-10000/special_tokens_map.json DELETED
@@ -1,107 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
107
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top2/checkpoint-10000/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
 
 
 
baseline_bs4_top2/checkpoint-10000/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
baseline_bs4_top2/checkpoint-10000/tokenizer_config.json DELETED
@@ -1,112 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "clean_up_tokenization_spaces": true,
105
- "eos_token": "</s>",
106
- "extra_ids": 100,
107
- "model_max_length": 512,
108
- "pad_token": "<pad>",
109
- "sp_model_kwargs": {},
110
- "tokenizer_class": "T5Tokenizer",
111
- "unk_token": "<unk>"
112
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top2/checkpoint-10000/trainer_state.json DELETED
@@ -1,139 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.008527584604298755,
5
- "eval_steps": 500,
6
- "global_step": 10000,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0,
13
- "learning_rate": 0.01,
14
- "loss": 1.6449,
15
- "step": 500
16
- },
17
- {
18
- "epoch": 0.0,
19
- "learning_rate": 0.01,
20
- "loss": 1.5651,
21
- "step": 1000
22
- },
23
- {
24
- "epoch": 0.0,
25
- "learning_rate": 0.01,
26
- "loss": 1.5875,
27
- "step": 1500
28
- },
29
- {
30
- "epoch": 0.0,
31
- "learning_rate": 0.01,
32
- "loss": 1.5539,
33
- "step": 2000
34
- },
35
- {
36
- "epoch": 0.0,
37
- "learning_rate": 0.01,
38
- "loss": 1.5524,
39
- "step": 2500
40
- },
41
- {
42
- "epoch": 0.0,
43
- "learning_rate": 0.01,
44
- "loss": 1.5449,
45
- "step": 3000
46
- },
47
- {
48
- "epoch": 0.0,
49
- "learning_rate": 0.01,
50
- "loss": 1.549,
51
- "step": 3500
52
- },
53
- {
54
- "epoch": 0.0,
55
- "learning_rate": 0.01,
56
- "loss": 1.5671,
57
- "step": 4000
58
- },
59
- {
60
- "epoch": 0.0,
61
- "learning_rate": 0.01,
62
- "loss": 1.5251,
63
- "step": 4500
64
- },
65
- {
66
- "epoch": 0.0,
67
- "learning_rate": 0.01,
68
- "loss": 1.5532,
69
- "step": 5000
70
- },
71
- {
72
- "epoch": 0.0,
73
- "learning_rate": 0.01,
74
- "loss": 1.5262,
75
- "step": 5500
76
- },
77
- {
78
- "epoch": 0.01,
79
- "learning_rate": 0.01,
80
- "loss": 1.5306,
81
- "step": 6000
82
- },
83
- {
84
- "epoch": 0.01,
85
- "learning_rate": 0.01,
86
- "loss": 1.5273,
87
- "step": 6500
88
- },
89
- {
90
- "epoch": 0.01,
91
- "learning_rate": 0.01,
92
- "loss": 1.519,
93
- "step": 7000
94
- },
95
- {
96
- "epoch": 0.01,
97
- "learning_rate": 0.01,
98
- "loss": 1.5305,
99
- "step": 7500
100
- },
101
- {
102
- "epoch": 0.01,
103
- "learning_rate": 0.01,
104
- "loss": 1.507,
105
- "step": 8000
106
- },
107
- {
108
- "epoch": 0.01,
109
- "learning_rate": 0.01,
110
- "loss": 1.5255,
111
- "step": 8500
112
- },
113
- {
114
- "epoch": 0.01,
115
- "learning_rate": 0.01,
116
- "loss": 1.5106,
117
- "step": 9000
118
- },
119
- {
120
- "epoch": 0.01,
121
- "learning_rate": 0.01,
122
- "loss": 1.5224,
123
- "step": 9500
124
- },
125
- {
126
- "epoch": 0.01,
127
- "learning_rate": 0.01,
128
- "loss": 1.5241,
129
- "step": 10000
130
- }
131
- ],
132
- "logging_steps": 500,
133
- "max_steps": 20000,
134
- "num_train_epochs": 1,
135
- "save_steps": 10000,
136
- "total_flos": 4.829257277256499e+16,
137
- "trial_name": null,
138
- "trial_params": null
139
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top2/checkpoint-10000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:af2adb9b227cf0f18c45853aa1db09565dc63c29547902da6f50352111a9a5e7
3
- size 4539
 
 
 
 
baseline_bs4_top2/checkpoint-20000/config.json DELETED
@@ -1,62 +0,0 @@
1
- {
2
- "_name_or_path": "google/flan-t5-base",
3
- "architectures": [
4
- "SoftRelPromptFlanT5"
5
- ],
6
- "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
- "d_kv": 64,
9
- "d_model": 768,
10
- "decoder_start_token_id": 0,
11
- "dense_act_fn": "gelu_new",
12
- "dropout_rate": 0.1,
13
- "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
- "initializer_factor": 1.0,
16
- "is_encoder_decoder": true,
17
- "is_gated_act": true,
18
- "layer_norm_epsilon": 1e-06,
19
- "model_type": "t5",
20
- "n_positions": 512,
21
- "num_decoder_layers": 12,
22
- "num_heads": 12,
23
- "num_layers": 12,
24
- "output_past": true,
25
- "pad_token_id": 0,
26
- "relative_attention_max_distance": 128,
27
- "relative_attention_num_buckets": 32,
28
- "task_specific_params": {
29
- "summarization": {
30
- "early_stopping": true,
31
- "length_penalty": 2.0,
32
- "max_length": 200,
33
- "min_length": 30,
34
- "no_repeat_ngram_size": 3,
35
- "num_beams": 4,
36
- "prefix": "summarize: "
37
- },
38
- "translation_en_to_de": {
39
- "early_stopping": true,
40
- "max_length": 300,
41
- "num_beams": 4,
42
- "prefix": "translate English to German: "
43
- },
44
- "translation_en_to_fr": {
45
- "early_stopping": true,
46
- "max_length": 300,
47
- "num_beams": 4,
48
- "prefix": "translate English to French: "
49
- },
50
- "translation_en_to_ro": {
51
- "early_stopping": true,
52
- "max_length": 300,
53
- "num_beams": 4,
54
- "prefix": "translate English to Romanian: "
55
- }
56
- },
57
- "tie_word_embeddings": false,
58
- "torch_dtype": "float32",
59
- "transformers_version": "4.33.1",
60
- "use_cache": true,
61
- "vocab_size": 32128
62
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top2/checkpoint-20000/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "decoder_start_token_id": 0,
4
- "eos_token_id": 1,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.33.1"
7
- }
 
 
 
 
 
 
 
 
baseline_bs4_top2/checkpoint-20000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4e916e263fcf89b5d721173eee18cb5b06899e27ec18f26d0a22ed9e2016282
3
- size 144545
 
 
 
 
baseline_bs4_top2/checkpoint-20000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3835ee505ebe813e921bacd0eea2ed25ed36c4ff4e46de9431631321049a3a53
3
- size 990480513
 
 
 
 
baseline_bs4_top2/checkpoint-20000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:973f9fb61da573e339acd801c6477b8cc4497eed865244e05b84a0eedbe74768
3
- size 14511
 
 
 
 
baseline_bs4_top2/checkpoint-20000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3ddfbcd33fc0d81c222807ca3e42cd9654f7e531f573941ed9599b1e07e0373
3
- size 627
 
 
 
 
baseline_bs4_top2/checkpoint-20000/special_tokens_map.json DELETED
@@ -1,107 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<extra_id_0>",
4
- "<extra_id_1>",
5
- "<extra_id_2>",
6
- "<extra_id_3>",
7
- "<extra_id_4>",
8
- "<extra_id_5>",
9
- "<extra_id_6>",
10
- "<extra_id_7>",
11
- "<extra_id_8>",
12
- "<extra_id_9>",
13
- "<extra_id_10>",
14
- "<extra_id_11>",
15
- "<extra_id_12>",
16
- "<extra_id_13>",
17
- "<extra_id_14>",
18
- "<extra_id_15>",
19
- "<extra_id_16>",
20
- "<extra_id_17>",
21
- "<extra_id_18>",
22
- "<extra_id_19>",
23
- "<extra_id_20>",
24
- "<extra_id_21>",
25
- "<extra_id_22>",
26
- "<extra_id_23>",
27
- "<extra_id_24>",
28
- "<extra_id_25>",
29
- "<extra_id_26>",
30
- "<extra_id_27>",
31
- "<extra_id_28>",
32
- "<extra_id_29>",
33
- "<extra_id_30>",
34
- "<extra_id_31>",
35
- "<extra_id_32>",
36
- "<extra_id_33>",
37
- "<extra_id_34>",
38
- "<extra_id_35>",
39
- "<extra_id_36>",
40
- "<extra_id_37>",
41
- "<extra_id_38>",
42
- "<extra_id_39>",
43
- "<extra_id_40>",
44
- "<extra_id_41>",
45
- "<extra_id_42>",
46
- "<extra_id_43>",
47
- "<extra_id_44>",
48
- "<extra_id_45>",
49
- "<extra_id_46>",
50
- "<extra_id_47>",
51
- "<extra_id_48>",
52
- "<extra_id_49>",
53
- "<extra_id_50>",
54
- "<extra_id_51>",
55
- "<extra_id_52>",
56
- "<extra_id_53>",
57
- "<extra_id_54>",
58
- "<extra_id_55>",
59
- "<extra_id_56>",
60
- "<extra_id_57>",
61
- "<extra_id_58>",
62
- "<extra_id_59>",
63
- "<extra_id_60>",
64
- "<extra_id_61>",
65
- "<extra_id_62>",
66
- "<extra_id_63>",
67
- "<extra_id_64>",
68
- "<extra_id_65>",
69
- "<extra_id_66>",
70
- "<extra_id_67>",
71
- "<extra_id_68>",
72
- "<extra_id_69>",
73
- "<extra_id_70>",
74
- "<extra_id_71>",
75
- "<extra_id_72>",
76
- "<extra_id_73>",
77
- "<extra_id_74>",
78
- "<extra_id_75>",
79
- "<extra_id_76>",
80
- "<extra_id_77>",
81
- "<extra_id_78>",
82
- "<extra_id_79>",
83
- "<extra_id_80>",
84
- "<extra_id_81>",
85
- "<extra_id_82>",
86
- "<extra_id_83>",
87
- "<extra_id_84>",
88
- "<extra_id_85>",
89
- "<extra_id_86>",
90
- "<extra_id_87>",
91
- "<extra_id_88>",
92
- "<extra_id_89>",
93
- "<extra_id_90>",
94
- "<extra_id_91>",
95
- "<extra_id_92>",
96
- "<extra_id_93>",
97
- "<extra_id_94>",
98
- "<extra_id_95>",
99
- "<extra_id_96>",
100
- "<extra_id_97>",
101
- "<extra_id_98>",
102
- "<extra_id_99>"
103
- ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
107
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baseline_bs4_top2/checkpoint-20000/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656