venkatarajendra
commited on
Commit
•
95625a5
1
Parent(s):
cc66872
Training in progress, step 500
Browse files- .gitignore +1 -0
- checkpoint-500/config.json +1 -1
- checkpoint-500/optimizer.pt +2 -2
- checkpoint-500/rng_state.pth +1 -1
- checkpoint-500/scheduler.pt +1 -1
- checkpoint-500/tokenizer.json +14 -2
- checkpoint-500/tokenizer_config.json +2 -43
- checkpoint-500/trainer_state.json +8 -15
- checkpoint-500/training_args.bin +2 -2
- config.json +1 -1
- pytorch_model.bin +3 -0
- tokenizer.json +14 -2
- tokenizer_config.json +2 -43
- training_args.bin +2 -2
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
checkpoint-*/
|
checkpoint-500/config.json
CHANGED
@@ -36,6 +36,6 @@
|
|
36 |
"sinusoidal_pos_embds": false,
|
37 |
"tie_weights_": true,
|
38 |
"torch_dtype": "float32",
|
39 |
-
"transformers_version": "4.
|
40 |
"vocab_size": 30522
|
41 |
}
|
|
|
36 |
"sinusoidal_pos_embds": false,
|
37 |
"tie_weights_": true,
|
38 |
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.26.1",
|
40 |
"vocab_size": 30522
|
41 |
}
|
checkpoint-500/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b15638192ed1ab5f901937fcdeff09bdaff65bbf9df84d41383f10b37649104f
|
3 |
+
size 535726074
|
checkpoint-500/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2636a72cc04b1941d8d9563a63e832834ffd3f31620a26415d097a7583e6f101
|
3 |
size 14244
|
checkpoint-500/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f29365aa09523198baebebe95ffb09ad5f00205c944d1df89a9000244c3bc23
|
3 |
size 1064
|
checkpoint-500/tokenizer.json
CHANGED
@@ -1,7 +1,19 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"added_tokens": [
|
6 |
{
|
7 |
"id": 0,
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": {
|
4 |
+
"direction": "Right",
|
5 |
+
"max_length": 512,
|
6 |
+
"strategy": "LongestFirst",
|
7 |
+
"stride": 0
|
8 |
+
},
|
9 |
+
"padding": {
|
10 |
+
"strategy": "BatchLongest",
|
11 |
+
"direction": "Right",
|
12 |
+
"pad_to_multiple_of": null,
|
13 |
+
"pad_id": 0,
|
14 |
+
"pad_type_id": 0,
|
15 |
+
"pad_token": "[PAD]"
|
16 |
+
},
|
17 |
"added_tokens": [
|
18 |
{
|
19 |
"id": 0,
|
checkpoint-500/tokenizer_config.json
CHANGED
@@ -1,53 +1,12 @@
|
|
1 |
{
|
2 |
-
"added_tokens_decoder": {
|
3 |
-
"0": {
|
4 |
-
"content": "[PAD]",
|
5 |
-
"lstrip": false,
|
6 |
-
"normalized": false,
|
7 |
-
"rstrip": false,
|
8 |
-
"single_word": false,
|
9 |
-
"special": true
|
10 |
-
},
|
11 |
-
"100": {
|
12 |
-
"content": "[UNK]",
|
13 |
-
"lstrip": false,
|
14 |
-
"normalized": false,
|
15 |
-
"rstrip": false,
|
16 |
-
"single_word": false,
|
17 |
-
"special": true
|
18 |
-
},
|
19 |
-
"101": {
|
20 |
-
"content": "[CLS]",
|
21 |
-
"lstrip": false,
|
22 |
-
"normalized": false,
|
23 |
-
"rstrip": false,
|
24 |
-
"single_word": false,
|
25 |
-
"special": true
|
26 |
-
},
|
27 |
-
"102": {
|
28 |
-
"content": "[SEP]",
|
29 |
-
"lstrip": false,
|
30 |
-
"normalized": false,
|
31 |
-
"rstrip": false,
|
32 |
-
"single_word": false,
|
33 |
-
"special": true
|
34 |
-
},
|
35 |
-
"103": {
|
36 |
-
"content": "[MASK]",
|
37 |
-
"lstrip": false,
|
38 |
-
"normalized": false,
|
39 |
-
"rstrip": false,
|
40 |
-
"single_word": false,
|
41 |
-
"special": true
|
42 |
-
}
|
43 |
-
},
|
44 |
-
"clean_up_tokenization_spaces": true,
|
45 |
"cls_token": "[CLS]",
|
46 |
"do_lower_case": true,
|
47 |
"mask_token": "[MASK]",
|
48 |
"model_max_length": 512,
|
|
|
49 |
"pad_token": "[PAD]",
|
50 |
"sep_token": "[SEP]",
|
|
|
51 |
"strip_accents": null,
|
52 |
"tokenize_chinese_chars": true,
|
53 |
"tokenizer_class": "DistilBertTokenizer",
|
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"cls_token": "[CLS]",
|
3 |
"do_lower_case": true,
|
4 |
"mask_token": "[MASK]",
|
5 |
"model_max_length": 512,
|
6 |
+
"name_or_path": "distilbert-base-uncased",
|
7 |
"pad_token": "[PAD]",
|
8 |
"sep_token": "[SEP]",
|
9 |
+
"special_tokens_map_file": null,
|
10 |
"strip_accents": null,
|
11 |
"tokenize_chinese_chars": true,
|
12 |
"tokenizer_class": "DistilBertTokenizer",
|
checkpoint-500/trainer_state.json
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 2.0,
|
5 |
-
"eval_steps": 500,
|
6 |
"global_step": 500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
@@ -10,36 +9,30 @@
|
|
10 |
"log_history": [
|
11 |
{
|
12 |
"epoch": 1.0,
|
13 |
-
"grad_norm": 4.321141242980957,
|
14 |
"learning_rate": 1e-05,
|
15 |
-
"loss": 0.
|
16 |
"step": 250
|
17 |
},
|
18 |
{
|
19 |
"epoch": 1.0,
|
20 |
-
"eval_accuracy": 0.
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 0.
|
23 |
-
"eval_runtime": 2.
|
24 |
-
"eval_samples_per_second":
|
25 |
-
"eval_steps_per_second":
|
26 |
"step": 250
|
27 |
},
|
28 |
{
|
29 |
"epoch": 2.0,
|
30 |
-
"grad_norm": 6.54923152923584,
|
31 |
"learning_rate": 0.0,
|
32 |
-
"loss": 0.
|
33 |
"step": 500
|
34 |
}
|
35 |
],
|
36 |
-
"logging_steps": 250,
|
37 |
"max_steps": 500,
|
38 |
-
"num_input_tokens_seen": 0,
|
39 |
"num_train_epochs": 2,
|
40 |
-
"save_steps": 500,
|
41 |
"total_flos": 720342861696000.0,
|
42 |
-
"train_batch_size": 64,
|
43 |
"trial_name": null,
|
44 |
"trial_params": null
|
45 |
}
|
|
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
"epoch": 2.0,
|
|
|
5 |
"global_step": 500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
|
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 1.0,
|
|
|
12 |
"learning_rate": 1e-05,
|
13 |
+
"loss": 0.8062,
|
14 |
"step": 250
|
15 |
},
|
16 |
{
|
17 |
"epoch": 1.0,
|
18 |
+
"eval_accuracy": 0.9115,
|
19 |
+
"eval_f1": 0.9100250703245248,
|
20 |
+
"eval_loss": 0.30541306734085083,
|
21 |
+
"eval_runtime": 2.0854,
|
22 |
+
"eval_samples_per_second": 959.028,
|
23 |
+
"eval_steps_per_second": 15.344,
|
24 |
"step": 250
|
25 |
},
|
26 |
{
|
27 |
"epoch": 2.0,
|
|
|
28 |
"learning_rate": 0.0,
|
29 |
+
"loss": 0.2399,
|
30 |
"step": 500
|
31 |
}
|
32 |
],
|
|
|
33 |
"max_steps": 500,
|
|
|
34 |
"num_train_epochs": 2,
|
|
|
35 |
"total_flos": 720342861696000.0,
|
|
|
36 |
"trial_name": null,
|
37 |
"trial_params": null
|
38 |
}
|
checkpoint-500/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8aab1ebd59775323e68a4c1858081f20377944583fade8f5ddcd8279ba9773dd
|
3 |
+
size 3960
|
config.json
CHANGED
@@ -36,6 +36,6 @@
|
|
36 |
"sinusoidal_pos_embds": false,
|
37 |
"tie_weights_": true,
|
38 |
"torch_dtype": "float32",
|
39 |
-
"transformers_version": "4.
|
40 |
"vocab_size": 30522
|
41 |
}
|
|
|
36 |
"sinusoidal_pos_embds": false,
|
37 |
"tie_weights_": true,
|
38 |
"torch_dtype": "float32",
|
39 |
+
"transformers_version": "4.26.1",
|
40 |
"vocab_size": 30522
|
41 |
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d90219563e1b25db874c968af86efcc8444a0dfc2e9225c2ae1d2e860a9cccfd
|
3 |
+
size 267868266
|
tokenizer.json
CHANGED
@@ -1,7 +1,19 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"added_tokens": [
|
6 |
{
|
7 |
"id": 0,
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": {
|
4 |
+
"direction": "Right",
|
5 |
+
"max_length": 512,
|
6 |
+
"strategy": "LongestFirst",
|
7 |
+
"stride": 0
|
8 |
+
},
|
9 |
+
"padding": {
|
10 |
+
"strategy": "BatchLongest",
|
11 |
+
"direction": "Right",
|
12 |
+
"pad_to_multiple_of": null,
|
13 |
+
"pad_id": 0,
|
14 |
+
"pad_type_id": 0,
|
15 |
+
"pad_token": "[PAD]"
|
16 |
+
},
|
17 |
"added_tokens": [
|
18 |
{
|
19 |
"id": 0,
|
tokenizer_config.json
CHANGED
@@ -1,53 +1,12 @@
|
|
1 |
{
|
2 |
-
"added_tokens_decoder": {
|
3 |
-
"0": {
|
4 |
-
"content": "[PAD]",
|
5 |
-
"lstrip": false,
|
6 |
-
"normalized": false,
|
7 |
-
"rstrip": false,
|
8 |
-
"single_word": false,
|
9 |
-
"special": true
|
10 |
-
},
|
11 |
-
"100": {
|
12 |
-
"content": "[UNK]",
|
13 |
-
"lstrip": false,
|
14 |
-
"normalized": false,
|
15 |
-
"rstrip": false,
|
16 |
-
"single_word": false,
|
17 |
-
"special": true
|
18 |
-
},
|
19 |
-
"101": {
|
20 |
-
"content": "[CLS]",
|
21 |
-
"lstrip": false,
|
22 |
-
"normalized": false,
|
23 |
-
"rstrip": false,
|
24 |
-
"single_word": false,
|
25 |
-
"special": true
|
26 |
-
},
|
27 |
-
"102": {
|
28 |
-
"content": "[SEP]",
|
29 |
-
"lstrip": false,
|
30 |
-
"normalized": false,
|
31 |
-
"rstrip": false,
|
32 |
-
"single_word": false,
|
33 |
-
"special": true
|
34 |
-
},
|
35 |
-
"103": {
|
36 |
-
"content": "[MASK]",
|
37 |
-
"lstrip": false,
|
38 |
-
"normalized": false,
|
39 |
-
"rstrip": false,
|
40 |
-
"single_word": false,
|
41 |
-
"special": true
|
42 |
-
}
|
43 |
-
},
|
44 |
-
"clean_up_tokenization_spaces": true,
|
45 |
"cls_token": "[CLS]",
|
46 |
"do_lower_case": true,
|
47 |
"mask_token": "[MASK]",
|
48 |
"model_max_length": 512,
|
|
|
49 |
"pad_token": "[PAD]",
|
50 |
"sep_token": "[SEP]",
|
|
|
51 |
"strip_accents": null,
|
52 |
"tokenize_chinese_chars": true,
|
53 |
"tokenizer_class": "DistilBertTokenizer",
|
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"cls_token": "[CLS]",
|
3 |
"do_lower_case": true,
|
4 |
"mask_token": "[MASK]",
|
5 |
"model_max_length": 512,
|
6 |
+
"name_or_path": "distilbert-base-uncased",
|
7 |
"pad_token": "[PAD]",
|
8 |
"sep_token": "[SEP]",
|
9 |
+
"special_tokens_map_file": null,
|
10 |
"strip_accents": null,
|
11 |
"tokenize_chinese_chars": true,
|
12 |
"tokenizer_class": "DistilBertTokenizer",
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8aab1ebd59775323e68a4c1858081f20377944583fade8f5ddcd8279ba9773dd
|
3 |
+
size 3960
|