Seonghyeon Lee
commited on
Commit
•
d0d9004
1
Parent(s):
43df447
feat: upload checkpoints
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 20211029_101219/answers-students.wa +0 -0
- 20211029_101219/answers-students.wa.untrained +0 -0
- 20211029_101219/checkpoint-2000/optimizer.pt +3 -0
- 20211029_101219/checkpoint-2000/pytorch_model.bin +3 -0
- 20211029_101219/checkpoint-2000/rng_state.pth +3 -0
- 20211029_101219/checkpoint-2000/scaler.pt +3 -0
- 20211029_101219/checkpoint-2000/scheduler.pt +3 -0
- 20211029_101219/checkpoint-2000/special_tokens_map.json +1 -0
- 20211029_101219/checkpoint-2000/tokenizer_config.json +1 -0
- 20211029_101219/checkpoint-2000/trainer_state.json +80 -0
- 20211029_101219/checkpoint-2000/training_args.bin +3 -0
- 20211029_101219/checkpoint-2000/vocab.txt +0 -0
- 20211029_101219/checkpoint-6250/optimizer.pt +3 -0
- 20211029_101219/checkpoint-6250/pytorch_model.bin +3 -0
- 20211029_101219/checkpoint-6250/rng_state.pth +3 -0
- 20211029_101219/checkpoint-6250/scaler.pt +3 -0
- 20211029_101219/checkpoint-6250/scheduler.pt +3 -0
- 20211029_101219/checkpoint-6250/special_tokens_map.json +1 -0
- 20211029_101219/checkpoint-6250/tokenizer_config.json +1 -0
- 20211029_101219/checkpoint-6250/trainer_state.json +213 -0
- 20211029_101219/checkpoint-6250/training_args.bin +3 -0
- 20211029_101219/checkpoint-6250/vocab.txt +0 -0
- 20211029_101219/data_args.json +1 -0
- 20211029_101219/eval_results.txt +1 -0
- 20211029_101219/headlines.wa +0 -0
- 20211029_101219/headlines.wa.untrained +0 -0
- 20211029_101219/images.wa +0 -0
- 20211029_101219/images.wa.untrained +0 -0
- 20211029_101219/model_args.json +1 -0
- 20211029_101219/train_results.txt +5 -0
- 20211029_101219/training_args.json +1 -0
- 20211030_161510/data_args.json +1 -0
- 20211030_161510/model_args.json +1 -0
- 20211030_161510/training_args.json +1 -0
- 20211030_161612/answers-students.wa +0 -0
- 20211030_161612/answers-students.wa.untrained +0 -0
- 20211030_161612/checkpoint-2000/optimizer.pt +3 -0
- 20211030_161612/checkpoint-2000/pytorch_model.bin +3 -0
- 20211030_161612/checkpoint-2000/rng_state.pth +3 -0
- 20211030_161612/checkpoint-2000/scaler.pt +3 -0
- 20211030_161612/checkpoint-2000/scheduler.pt +3 -0
- 20211030_161612/checkpoint-2000/special_tokens_map.json +1 -0
- 20211030_161612/checkpoint-2000/tokenizer_config.json +1 -0
- 20211030_161612/checkpoint-2000/trainer_state.json +80 -0
- 20211030_161612/checkpoint-2000/training_args.bin +3 -0
- 20211030_161612/checkpoint-2000/vocab.txt +0 -0
- 20211030_161612/checkpoint-6250/optimizer.pt +3 -0
- 20211030_161612/checkpoint-6250/pytorch_model.bin +3 -0
- 20211030_161612/checkpoint-6250/rng_state.pth +3 -0
- 20211030_161612/checkpoint-6250/scaler.pt +3 -0
20211029_101219/answers-students.wa
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211029_101219/answers-students.wa.untrained
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211029_101219/checkpoint-2000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2ec3f0d2b7c3b5a65a416c246e16d59d9af3fd850b48cc3db8f5d5ca10e7747
|
3 |
+
size 875973285
|
20211029_101219/checkpoint-2000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9118661b7a1faa0ea976e7a43c52c17a06c19e5a040b890f51544901ac932669
|
3 |
+
size 440387437
|
20211029_101219/checkpoint-2000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2f52cc3028827f4c99031990038e86f715b12cdf8538aabc1bac0dee99261b0
|
3 |
+
size 14503
|
20211029_101219/checkpoint-2000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3914f16c24b1fd0d20aa37bebdd55cb46873ae864e0847fd6dcc5768e6d7497
|
3 |
+
size 559
|
20211029_101219/checkpoint-2000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:512941103bf1e720143f9a916d47d84f8a4565c8eb359bc4b6aa9d12fdcb5f07
|
3 |
+
size 623
|
20211029_101219/checkpoint-2000/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
20211029_101219/checkpoint-2000/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": "/home/sh0416/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4", "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
|
20211029_101219/checkpoint-2000/trainer_state.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8695299721915639,
|
3 |
+
"best_model_checkpoint": "/home/sh0416/checkpoints/20211029_101219/checkpoint-2000",
|
4 |
+
"epoch": 0.9289363678588016,
|
5 |
+
"global_step": 2000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.12,
|
12 |
+
"eval_stsb_spearman": 0.8607327167766351,
|
13 |
+
"step": 250
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"epoch": 0.23,
|
17 |
+
"learning_rate": 3.691593125870878e-05,
|
18 |
+
"loss": 0.6096,
|
19 |
+
"step": 500
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"epoch": 0.23,
|
23 |
+
"eval_stsb_spearman": 0.8686592794214189,
|
24 |
+
"step": 500
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.35,
|
28 |
+
"eval_stsb_spearman": 0.859845431717547,
|
29 |
+
"step": 750
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 0.46,
|
33 |
+
"learning_rate": 3.381947669917944e-05,
|
34 |
+
"loss": 0.4635,
|
35 |
+
"step": 1000
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 0.46,
|
39 |
+
"eval_stsb_spearman": 0.8640662264378054,
|
40 |
+
"step": 1000
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 0.58,
|
44 |
+
"eval_stsb_spearman": 0.8658685932167778,
|
45 |
+
"step": 1250
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 0.7,
|
49 |
+
"learning_rate": 3.07230221396501e-05,
|
50 |
+
"loss": 0.4223,
|
51 |
+
"step": 1500
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.7,
|
55 |
+
"eval_stsb_spearman": 0.8684476529412299,
|
56 |
+
"step": 1500
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 0.81,
|
60 |
+
"eval_stsb_spearman": 0.8683877027220283,
|
61 |
+
"step": 1750
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 0.93,
|
65 |
+
"learning_rate": 2.7626567580120764e-05,
|
66 |
+
"loss": 0.4018,
|
67 |
+
"step": 2000
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 0.93,
|
71 |
+
"eval_stsb_spearman": 0.8695299721915639,
|
72 |
+
"step": 2000
|
73 |
+
}
|
74 |
+
],
|
75 |
+
"max_steps": 6459,
|
76 |
+
"num_train_epochs": 3,
|
77 |
+
"total_flos": 0.0,
|
78 |
+
"trial_name": null,
|
79 |
+
"trial_params": null
|
80 |
+
}
|
20211029_101219/checkpoint-2000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f21801a0f477016c386d7845cd97a476e091b21f5e754e8d2b4a45d97a33731b
|
3 |
+
size 2735
|
20211029_101219/checkpoint-2000/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211029_101219/checkpoint-6250/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c15b19a12e17a8dd82530b2358f9f1db502d9d4774cbb17b59c9fc81202b6a8
|
3 |
+
size 875973285
|
20211029_101219/checkpoint-6250/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4da87d8301f29c03be0b1adf174e50c1b1448248f99bda442ff4cb3a0aa6bafe
|
3 |
+
size 440387437
|
20211029_101219/checkpoint-6250/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b428c2f579b4b0f614f175c697132b1e59f00400df624235d7967af5f7538b9
|
3 |
+
size 14503
|
20211029_101219/checkpoint-6250/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9e6be2d9047bd8c9b6bd066381ccb3d9565dd7267a91985c6cdffa400973ba9
|
3 |
+
size 559
|
20211029_101219/checkpoint-6250/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43193f69601b44b72d776af0fbc138141b555844e67a165b109ba933b2b63767
|
3 |
+
size 623
|
20211029_101219/checkpoint-6250/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
20211029_101219/checkpoint-6250/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": "/home/sh0416/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4", "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
|
20211029_101219/checkpoint-6250/trainer_state.json
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8695299721915639,
|
3 |
+
"best_model_checkpoint": "/home/sh0416/checkpoints/20211029_101219/checkpoint-2000",
|
4 |
+
"epoch": 2.9029261495587555,
|
5 |
+
"global_step": 6250,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.12,
|
12 |
+
"eval_stsb_spearman": 0.8607327167766351,
|
13 |
+
"step": 250
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"epoch": 0.23,
|
17 |
+
"learning_rate": 3.691593125870878e-05,
|
18 |
+
"loss": 0.6096,
|
19 |
+
"step": 500
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"epoch": 0.23,
|
23 |
+
"eval_stsb_spearman": 0.8686592794214189,
|
24 |
+
"step": 500
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.35,
|
28 |
+
"eval_stsb_spearman": 0.859845431717547,
|
29 |
+
"step": 750
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 0.46,
|
33 |
+
"learning_rate": 3.381947669917944e-05,
|
34 |
+
"loss": 0.4635,
|
35 |
+
"step": 1000
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 0.46,
|
39 |
+
"eval_stsb_spearman": 0.8640662264378054,
|
40 |
+
"step": 1000
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 0.58,
|
44 |
+
"eval_stsb_spearman": 0.8658685932167778,
|
45 |
+
"step": 1250
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 0.7,
|
49 |
+
"learning_rate": 3.07230221396501e-05,
|
50 |
+
"loss": 0.4223,
|
51 |
+
"step": 1500
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.7,
|
55 |
+
"eval_stsb_spearman": 0.8684476529412299,
|
56 |
+
"step": 1500
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 0.81,
|
60 |
+
"eval_stsb_spearman": 0.8683877027220283,
|
61 |
+
"step": 1750
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 0.93,
|
65 |
+
"learning_rate": 2.7626567580120764e-05,
|
66 |
+
"loss": 0.4018,
|
67 |
+
"step": 2000
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 0.93,
|
71 |
+
"eval_stsb_spearman": 0.8695299721915639,
|
72 |
+
"step": 2000
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 1.05,
|
76 |
+
"eval_stsb_spearman": 0.8641507442895762,
|
77 |
+
"step": 2250
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"epoch": 1.16,
|
81 |
+
"learning_rate": 2.4530113020591425e-05,
|
82 |
+
"loss": 0.329,
|
83 |
+
"step": 2500
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 1.16,
|
87 |
+
"eval_stsb_spearman": 0.867926581408182,
|
88 |
+
"step": 2500
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 1.28,
|
92 |
+
"eval_stsb_spearman": 0.8668693608104431,
|
93 |
+
"step": 2750
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 1.39,
|
97 |
+
"learning_rate": 2.1433658461062086e-05,
|
98 |
+
"loss": 0.2898,
|
99 |
+
"step": 3000
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"epoch": 1.39,
|
103 |
+
"eval_stsb_spearman": 0.8683401493085128,
|
104 |
+
"step": 3000
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 1.51,
|
108 |
+
"eval_stsb_spearman": 0.8616608403167029,
|
109 |
+
"step": 3250
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"epoch": 1.63,
|
113 |
+
"learning_rate": 1.8337203901532747e-05,
|
114 |
+
"loss": 0.292,
|
115 |
+
"step": 3500
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"epoch": 1.63,
|
119 |
+
"eval_stsb_spearman": 0.8661633097318306,
|
120 |
+
"step": 3500
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"epoch": 1.74,
|
124 |
+
"eval_stsb_spearman": 0.8662656674535325,
|
125 |
+
"step": 3750
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"epoch": 1.86,
|
129 |
+
"learning_rate": 1.5240749342003407e-05,
|
130 |
+
"loss": 0.2826,
|
131 |
+
"step": 4000
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 1.86,
|
135 |
+
"eval_stsb_spearman": 0.8662364297692312,
|
136 |
+
"step": 4000
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 1.97,
|
140 |
+
"eval_stsb_spearman": 0.8647880098527821,
|
141 |
+
"step": 4250
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"epoch": 2.09,
|
145 |
+
"learning_rate": 1.2144294782474068e-05,
|
146 |
+
"loss": 0.251,
|
147 |
+
"step": 4500
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 2.09,
|
151 |
+
"eval_stsb_spearman": 0.8644350558731776,
|
152 |
+
"step": 4500
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 2.21,
|
156 |
+
"eval_stsb_spearman": 0.8650629299708443,
|
157 |
+
"step": 4750
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"epoch": 2.32,
|
161 |
+
"learning_rate": 9.04784022294473e-06,
|
162 |
+
"loss": 0.2152,
|
163 |
+
"step": 5000
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 2.32,
|
167 |
+
"eval_stsb_spearman": 0.8626686418459087,
|
168 |
+
"step": 5000
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"epoch": 2.44,
|
172 |
+
"eval_stsb_spearman": 0.8643429650624574,
|
173 |
+
"step": 5250
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"epoch": 2.55,
|
177 |
+
"learning_rate": 5.95138566341539e-06,
|
178 |
+
"loss": 0.2165,
|
179 |
+
"step": 5500
|
180 |
+
},
|
181 |
+
{
|
182 |
+
"epoch": 2.55,
|
183 |
+
"eval_stsb_spearman": 0.8630366314230514,
|
184 |
+
"step": 5500
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 2.67,
|
188 |
+
"eval_stsb_spearman": 0.8629383082790121,
|
189 |
+
"step": 5750
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 2.79,
|
193 |
+
"learning_rate": 2.8549311038860505e-06,
|
194 |
+
"loss": 0.2176,
|
195 |
+
"step": 6000
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"epoch": 2.79,
|
199 |
+
"eval_stsb_spearman": 0.862915863423685,
|
200 |
+
"step": 6000
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"epoch": 2.9,
|
204 |
+
"eval_stsb_spearman": 0.8627168589609486,
|
205 |
+
"step": 6250
|
206 |
+
}
|
207 |
+
],
|
208 |
+
"max_steps": 6459,
|
209 |
+
"num_train_epochs": 3,
|
210 |
+
"total_flos": 0.0,
|
211 |
+
"trial_name": null,
|
212 |
+
"trial_params": null
|
213 |
+
}
|
20211029_101219/checkpoint-6250/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f21801a0f477016c386d7845cd97a476e091b21f5e754e8d2b4a45d97a33731b
|
3 |
+
size 2735
|
20211029_101219/checkpoint-6250/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211029_101219/data_args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"data_type": "simcse-nli", "train_file": "/nas/home/sh0416/data/simcse/nli_for_simcse.csv", "max_seq_length": 32, "add_typo_corpus": false, "typo_corpus_filepath": null, "dup_rate": 0.08}
|
20211029_101219/eval_results.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
0.7523,0.8506,0.8099,0.8626,0.8150,0.8521,0.8049,0.8695
|
20211029_101219/headlines.wa
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211029_101219/headlines.wa.untrained
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211029_101219/images.wa
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211029_101219/images.wa.untrained
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211029_101219/model_args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name_or_path": "bert-base-uncased", "loss_type": "rwmdcse", "temp": 0.05, "hidden_dropout_prob": 0.1, "mlp_only_train": true, "coeff_mlm": 0.1, "loss_rwmd": false, "layer_idx": 12}
|
20211029_101219/train_results.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch = 3.0
|
2 |
+
train_loss = 0.3241952664897545
|
3 |
+
train_runtime = 7201.1171
|
4 |
+
train_samples_per_second = 114.816
|
5 |
+
train_steps_per_second = 0.897
|
20211029_101219/training_args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/home/sh0416/checkpoints/20211029_101219", "overwrite_output_dir": false, "do_train": true, "do_eval": true, "do_predict": false, "evaluation_strategy": "steps", "prediction_loss_only": false, "per_device_train_batch_size": 128, "per_device_eval_batch_size": 128, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 1, "eval_accumulation_steps": null, "learning_rate": 4e-05, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3.0, "max_steps": -1, "lr_scheduler_type": "linear", "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": -1, "log_level_replica": -1, "log_on_each_node": true, "logging_dir": "/home/sh0416/checkpoints/20211029_101219/runs/Oct29_10-12-20_jarvis", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 500, "save_strategy": "steps", "save_steps": 500, "save_total_limit": 1, "save_on_each_node": false, "no_cuda": false, "seed": 3, "fp16": true, "fp16_opt_level": "O1", "fp16_backend": "auto", "fp16_full_eval": false, "local_rank": -1, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": true, "eval_steps": 250, "dataloader_num_workers": 0, "past_index": -1, "run_name": "/home/sh0416/checkpoints/20211029_101219", "disable_tqdm": false, "remove_unused_columns": true, "label_names": null, "load_best_model_at_end": true, "metric_for_best_model": "stsb_spearman", "greater_is_better": true, "ignore_data_skip": false, "sharded_ddp": [], "deepspeed": null, "label_smoothing_factor": 0.0, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": [], "ddp_find_unused_parameters": null, "dataloader_pin_memory": true, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "push_to_hub_model_id": "20211029_101219", "push_to_hub_organization": null, "push_to_hub_token": null, "_n_gpu": 1, "mp_parameters": "", "eval_file": "/nas/home/sh0416/data/"}
|
20211030_161510/data_args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"data_type": "simcse-nli", "train_file": "/nas/home/sh0416/data/simcse/nli_for_simcse.csv", "max_seq_length": 32, "add_typo_corpus": false, "typo_corpus_filepath": null, "dup_rate": 0.08}
|
20211030_161510/model_args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model_name_or_path": "bert-base-uncased", "loss_type": "simcse-avg", "temp": 0.05, "hidden_dropout_prob": 0.1, "mlp_only_train": true, "coeff_mlm": 0.1, "loss_rwmd": false, "layer_idx": 12}
|
20211030_161510/training_args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"output_dir": "/home/sh0416/checkpoints/20211030_161510", "overwrite_output_dir": false, "do_train": true, "do_eval": true, "do_predict": false, "evaluation_strategy": "steps", "prediction_loss_only": false, "per_device_train_batch_size": 128, "per_device_eval_batch_size": 128, "per_gpu_train_batch_size": null, "per_gpu_eval_batch_size": null, "gradient_accumulation_steps": 1, "eval_accumulation_steps": null, "learning_rate": 4e-05, "weight_decay": 0.0, "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_epsilon": 1e-08, "max_grad_norm": 1.0, "num_train_epochs": 3.0, "max_steps": -1, "lr_scheduler_type": "linear", "warmup_ratio": 0.0, "warmup_steps": 0, "log_level": -1, "log_level_replica": -1, "log_on_each_node": true, "logging_dir": "/home/sh0416/checkpoints/20211030_161510/runs/Oct30_16-15-10_jarvis", "logging_strategy": "steps", "logging_first_step": false, "logging_steps": 500, "save_strategy": "steps", "save_steps": 500, "save_total_limit": 1, "save_on_each_node": false, "no_cuda": false, "seed": 3, "fp16": true, "fp16_opt_level": "O1", "fp16_backend": "auto", "fp16_full_eval": false, "local_rank": -1, "tpu_num_cores": null, "tpu_metrics_debug": false, "debug": [], "dataloader_drop_last": true, "eval_steps": 250, "dataloader_num_workers": 0, "past_index": -1, "run_name": "/home/sh0416/checkpoints/20211030_161510", "disable_tqdm": false, "remove_unused_columns": true, "label_names": null, "load_best_model_at_end": true, "metric_for_best_model": "stsb_spearman", "greater_is_better": true, "ignore_data_skip": false, "sharded_ddp": [], "deepspeed": null, "label_smoothing_factor": 0.0, "adafactor": false, "group_by_length": false, "length_column_name": "length", "report_to": [], "ddp_find_unused_parameters": null, "dataloader_pin_memory": true, "skip_memory_metrics": true, "use_legacy_prediction_loop": false, "push_to_hub": false, "resume_from_checkpoint": null, "push_to_hub_model_id": "20211030_161510", "push_to_hub_organization": null, "push_to_hub_token": null, "_n_gpu": 1, "mp_parameters": "", "eval_file": "/nas/home/sh0416/data/"}
|
20211030_161612/answers-students.wa
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211030_161612/answers-students.wa.untrained
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211030_161612/checkpoint-2000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55d092fb309a1dbce28c043a5044613fb9e3ad5643787356445f6ea4a27813ce
|
3 |
+
size 875973285
|
20211030_161612/checkpoint-2000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be04aec488d580c8c1134e1602916cd4240244d7c4bf842f50e77028e62df995
|
3 |
+
size 440387309
|
20211030_161612/checkpoint-2000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2f52cc3028827f4c99031990038e86f715b12cdf8538aabc1bac0dee99261b0
|
3 |
+
size 14503
|
20211030_161612/checkpoint-2000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f23db1abf0b77198c6e3a9212a16fca37cbfe2158135986751bf408c3cec5d63
|
3 |
+
size 559
|
20211030_161612/checkpoint-2000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98c347fc75a377cadf6d4202c418493496db9df9270b4ad415666145a83227b0
|
3 |
+
size 623
|
20211030_161612/checkpoint-2000/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
20211030_161612/checkpoint-2000/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "do_basic_tokenize": true, "never_split": null, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "tokenizer_file": "/home/sh0416/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4", "name_or_path": "bert-base-uncased", "tokenizer_class": "BertTokenizer"}
|
20211030_161612/checkpoint-2000/trainer_state.json
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.8625848570907281,
|
3 |
+
"best_model_checkpoint": "/home/sh0416/checkpoints/20211030_161612/checkpoint-2000",
|
4 |
+
"epoch": 0.9289363678588016,
|
5 |
+
"global_step": 2000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.12,
|
12 |
+
"eval_stsb_spearman": 0.849104410089273,
|
13 |
+
"step": 250
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"epoch": 0.23,
|
17 |
+
"learning_rate": 3.6922124167827844e-05,
|
18 |
+
"loss": 0.6558,
|
19 |
+
"step": 500
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"epoch": 0.23,
|
23 |
+
"eval_stsb_spearman": 0.8549645102083819,
|
24 |
+
"step": 500
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.35,
|
28 |
+
"eval_stsb_spearman": 0.8493755409241399,
|
29 |
+
"step": 750
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 0.46,
|
33 |
+
"learning_rate": 3.38256696082985e-05,
|
34 |
+
"loss": 0.5061,
|
35 |
+
"step": 1000
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"epoch": 0.46,
|
39 |
+
"eval_stsb_spearman": 0.8558743382367817,
|
40 |
+
"step": 1000
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 0.58,
|
44 |
+
"eval_stsb_spearman": 0.8579781749671418,
|
45 |
+
"step": 1250
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 0.7,
|
49 |
+
"learning_rate": 3.072921504876916e-05,
|
50 |
+
"loss": 0.4618,
|
51 |
+
"step": 1500
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.7,
|
55 |
+
"eval_stsb_spearman": 0.8551950210432068,
|
56 |
+
"step": 1500
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"epoch": 0.81,
|
60 |
+
"eval_stsb_spearman": 0.8575288731834817,
|
61 |
+
"step": 1750
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"epoch": 0.93,
|
65 |
+
"learning_rate": 2.763276048923982e-05,
|
66 |
+
"loss": 0.4405,
|
67 |
+
"step": 2000
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"epoch": 0.93,
|
71 |
+
"eval_stsb_spearman": 0.8625848570907281,
|
72 |
+
"step": 2000
|
73 |
+
}
|
74 |
+
],
|
75 |
+
"max_steps": 6459,
|
76 |
+
"num_train_epochs": 3,
|
77 |
+
"total_flos": 0.0,
|
78 |
+
"trial_name": null,
|
79 |
+
"trial_params": null
|
80 |
+
}
|
20211030_161612/checkpoint-2000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa120f507d239c6af97ffb94ef4194b95342c4b09bbeb6681efc143dc43f8c6f
|
3 |
+
size 2735
|
20211030_161612/checkpoint-2000/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
20211030_161612/checkpoint-6250/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78b4e784b193c65a0c3bffd4faef2628130b7ee68800d57a7066b4ca689645cd
|
3 |
+
size 875973285
|
20211030_161612/checkpoint-6250/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da71a645f28290edf4434b9c7cd1a8235d702f18dc8c9eaf3c521f87a127da30
|
3 |
+
size 440387309
|
20211030_161612/checkpoint-6250/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b428c2f579b4b0f614f175c697132b1e59f00400df624235d7967af5f7538b9
|
3 |
+
size 14503
|
20211030_161612/checkpoint-6250/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53a7cbf97da840f6ef5b40dda02ec9e029260fa5b4ee7e5b95075489b65e35e9
|
3 |
+
size 559
|