Training in progress, step 6453, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140880624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d43891c5df948afcf4c03d634fb0678427c23d913afad711e13a4663a0525de2
|
3 |
size 1140880624
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2281891834
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7be1a78f616202d37598b63e4d1541f350ed7e87a4bc0bf460604eba4b22a57e
|
3 |
size 2281891834
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97aaf044d161c6bbe1f6f4b94ab21a1ef10011613508384aa11192649a62ba61
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:954dab0fd240c75304d0a12263df7af8d924053c8397e45cf55dff7129e94cfe
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -43505,6 +43505,1679 @@
|
|
43505 |
"learning_rate": 8.555207568919529e-05,
|
43506 |
"loss": 0.2022,
|
43507 |
"step": 6214
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43508 |
}
|
43509 |
],
|
43510 |
"logging_steps": 1,
|
@@ -43524,7 +45197,7 @@
|
|
43524 |
"attributes": {}
|
43525 |
}
|
43526 |
},
|
43527 |
-
"total_flos": 3.
|
43528 |
"train_batch_size": 4,
|
43529 |
"trial_name": null,
|
43530 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.2609196680380483,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 6453,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
43505 |
"learning_rate": 8.555207568919529e-05,
|
43506 |
"loss": 0.2022,
|
43507 |
"step": 6214
|
43508 |
+
},
|
43509 |
+
{
|
43510 |
+
"epoch": 0.2512964104844987,
|
43511 |
+
"grad_norm": 5.131168842315674,
|
43512 |
+
"learning_rate": 8.554759119116139e-05,
|
43513 |
+
"loss": 0.1892,
|
43514 |
+
"step": 6215
|
43515 |
+
},
|
43516 |
+
{
|
43517 |
+
"epoch": 0.25133684433976566,
|
43518 |
+
"grad_norm": 3.4465293884277344,
|
43519 |
+
"learning_rate": 8.554310611483727e-05,
|
43520 |
+
"loss": 0.193,
|
43521 |
+
"step": 6216
|
43522 |
+
},
|
43523 |
+
{
|
43524 |
+
"epoch": 0.2513772781950327,
|
43525 |
+
"grad_norm": 1.9942100048065186,
|
43526 |
+
"learning_rate": 8.553862046029593e-05,
|
43527 |
+
"loss": 0.0839,
|
43528 |
+
"step": 6217
|
43529 |
+
},
|
43530 |
+
{
|
43531 |
+
"epoch": 0.2514177120502997,
|
43532 |
+
"grad_norm": 3.042116403579712,
|
43533 |
+
"learning_rate": 8.553413422761032e-05,
|
43534 |
+
"loss": 0.1122,
|
43535 |
+
"step": 6218
|
43536 |
+
},
|
43537 |
+
{
|
43538 |
+
"epoch": 0.25145814590556675,
|
43539 |
+
"grad_norm": 4.3754472732543945,
|
43540 |
+
"learning_rate": 8.552964741685343e-05,
|
43541 |
+
"loss": 0.1772,
|
43542 |
+
"step": 6219
|
43543 |
+
},
|
43544 |
+
{
|
43545 |
+
"epoch": 0.25149857976083373,
|
43546 |
+
"grad_norm": 2.8776440620422363,
|
43547 |
+
"learning_rate": 8.552516002809824e-05,
|
43548 |
+
"loss": 0.0951,
|
43549 |
+
"step": 6220
|
43550 |
+
},
|
43551 |
+
{
|
43552 |
+
"epoch": 0.25153901361610076,
|
43553 |
+
"grad_norm": 3.3530313968658447,
|
43554 |
+
"learning_rate": 8.552067206141776e-05,
|
43555 |
+
"loss": 0.1026,
|
43556 |
+
"step": 6221
|
43557 |
+
},
|
43558 |
+
{
|
43559 |
+
"epoch": 0.2515794474713678,
|
43560 |
+
"grad_norm": 7.831663131713867,
|
43561 |
+
"learning_rate": 8.551618351688502e-05,
|
43562 |
+
"loss": 0.2196,
|
43563 |
+
"step": 6222
|
43564 |
+
},
|
43565 |
+
{
|
43566 |
+
"epoch": 0.25161988132663476,
|
43567 |
+
"grad_norm": 4.250004291534424,
|
43568 |
+
"learning_rate": 8.5511694394573e-05,
|
43569 |
+
"loss": 0.2105,
|
43570 |
+
"step": 6223
|
43571 |
+
},
|
43572 |
+
{
|
43573 |
+
"epoch": 0.2516603151819018,
|
43574 |
+
"grad_norm": 5.238514423370361,
|
43575 |
+
"learning_rate": 8.550720469455476e-05,
|
43576 |
+
"loss": 0.1809,
|
43577 |
+
"step": 6224
|
43578 |
+
},
|
43579 |
+
{
|
43580 |
+
"epoch": 0.2517007490371688,
|
43581 |
+
"grad_norm": 4.478693008422852,
|
43582 |
+
"learning_rate": 8.550271441690333e-05,
|
43583 |
+
"loss": 0.1392,
|
43584 |
+
"step": 6225
|
43585 |
+
},
|
43586 |
+
{
|
43587 |
+
"epoch": 0.25174118289243586,
|
43588 |
+
"grad_norm": 2.730726480484009,
|
43589 |
+
"learning_rate": 8.549822356169174e-05,
|
43590 |
+
"loss": 0.1215,
|
43591 |
+
"step": 6226
|
43592 |
+
},
|
43593 |
+
{
|
43594 |
+
"epoch": 0.25178161674770283,
|
43595 |
+
"grad_norm": 4.786006450653076,
|
43596 |
+
"learning_rate": 8.54937321289931e-05,
|
43597 |
+
"loss": 0.1379,
|
43598 |
+
"step": 6227
|
43599 |
+
},
|
43600 |
+
{
|
43601 |
+
"epoch": 0.25182205060296986,
|
43602 |
+
"grad_norm": 3.0412774085998535,
|
43603 |
+
"learning_rate": 8.548924011888041e-05,
|
43604 |
+
"loss": 0.0681,
|
43605 |
+
"step": 6228
|
43606 |
+
},
|
43607 |
+
{
|
43608 |
+
"epoch": 0.2518624844582369,
|
43609 |
+
"grad_norm": 7.8732500076293945,
|
43610 |
+
"learning_rate": 8.548474753142678e-05,
|
43611 |
+
"loss": 0.2025,
|
43612 |
+
"step": 6229
|
43613 |
+
},
|
43614 |
+
{
|
43615 |
+
"epoch": 0.2519029183135039,
|
43616 |
+
"grad_norm": 5.494780540466309,
|
43617 |
+
"learning_rate": 8.54802543667053e-05,
|
43618 |
+
"loss": 0.1258,
|
43619 |
+
"step": 6230
|
43620 |
+
},
|
43621 |
+
{
|
43622 |
+
"epoch": 0.2519433521687709,
|
43623 |
+
"grad_norm": 4.78334379196167,
|
43624 |
+
"learning_rate": 8.547576062478907e-05,
|
43625 |
+
"loss": 0.306,
|
43626 |
+
"step": 6231
|
43627 |
+
},
|
43628 |
+
{
|
43629 |
+
"epoch": 0.2519837860240379,
|
43630 |
+
"grad_norm": 4.669282913208008,
|
43631 |
+
"learning_rate": 8.547126630575117e-05,
|
43632 |
+
"loss": 0.267,
|
43633 |
+
"step": 6232
|
43634 |
+
},
|
43635 |
+
{
|
43636 |
+
"epoch": 0.25202421987930496,
|
43637 |
+
"grad_norm": 4.810275077819824,
|
43638 |
+
"learning_rate": 8.546677140966473e-05,
|
43639 |
+
"loss": 0.0795,
|
43640 |
+
"step": 6233
|
43641 |
+
},
|
43642 |
+
{
|
43643 |
+
"epoch": 0.25206465373457193,
|
43644 |
+
"grad_norm": 6.018242359161377,
|
43645 |
+
"learning_rate": 8.546227593660286e-05,
|
43646 |
+
"loss": 0.2052,
|
43647 |
+
"step": 6234
|
43648 |
+
},
|
43649 |
+
{
|
43650 |
+
"epoch": 0.25210508758983896,
|
43651 |
+
"grad_norm": 4.984010696411133,
|
43652 |
+
"learning_rate": 8.545777988663873e-05,
|
43653 |
+
"loss": 0.1985,
|
43654 |
+
"step": 6235
|
43655 |
+
},
|
43656 |
+
{
|
43657 |
+
"epoch": 0.252145521445106,
|
43658 |
+
"grad_norm": 6.683145046234131,
|
43659 |
+
"learning_rate": 8.545328325984541e-05,
|
43660 |
+
"loss": 0.0936,
|
43661 |
+
"step": 6236
|
43662 |
+
},
|
43663 |
+
{
|
43664 |
+
"epoch": 0.252185955300373,
|
43665 |
+
"grad_norm": 3.997206449508667,
|
43666 |
+
"learning_rate": 8.544878605629614e-05,
|
43667 |
+
"loss": 0.1981,
|
43668 |
+
"step": 6237
|
43669 |
+
},
|
43670 |
+
{
|
43671 |
+
"epoch": 0.25222638915564,
|
43672 |
+
"grad_norm": 4.37819766998291,
|
43673 |
+
"learning_rate": 8.544428827606401e-05,
|
43674 |
+
"loss": 0.1916,
|
43675 |
+
"step": 6238
|
43676 |
+
},
|
43677 |
+
{
|
43678 |
+
"epoch": 0.252266823010907,
|
43679 |
+
"grad_norm": 6.649611473083496,
|
43680 |
+
"learning_rate": 8.543978991922223e-05,
|
43681 |
+
"loss": 0.1783,
|
43682 |
+
"step": 6239
|
43683 |
+
},
|
43684 |
+
{
|
43685 |
+
"epoch": 0.25230725686617406,
|
43686 |
+
"grad_norm": 2.1568360328674316,
|
43687 |
+
"learning_rate": 8.543529098584395e-05,
|
43688 |
+
"loss": 0.1872,
|
43689 |
+
"step": 6240
|
43690 |
+
},
|
43691 |
+
{
|
43692 |
+
"epoch": 0.2523476907214411,
|
43693 |
+
"grad_norm": 2.9699277877807617,
|
43694 |
+
"learning_rate": 8.543079147600239e-05,
|
43695 |
+
"loss": 0.1311,
|
43696 |
+
"step": 6241
|
43697 |
+
},
|
43698 |
+
{
|
43699 |
+
"epoch": 0.25238812457670806,
|
43700 |
+
"grad_norm": 1.729875087738037,
|
43701 |
+
"learning_rate": 8.542629138977072e-05,
|
43702 |
+
"loss": 0.1097,
|
43703 |
+
"step": 6242
|
43704 |
+
},
|
43705 |
+
{
|
43706 |
+
"epoch": 0.2524285584319751,
|
43707 |
+
"grad_norm": 10.33169174194336,
|
43708 |
+
"learning_rate": 8.542179072722216e-05,
|
43709 |
+
"loss": 0.2242,
|
43710 |
+
"step": 6243
|
43711 |
+
},
|
43712 |
+
{
|
43713 |
+
"epoch": 0.2524689922872421,
|
43714 |
+
"grad_norm": 6.025792121887207,
|
43715 |
+
"learning_rate": 8.541728948842994e-05,
|
43716 |
+
"loss": 0.1638,
|
43717 |
+
"step": 6244
|
43718 |
+
},
|
43719 |
+
{
|
43720 |
+
"epoch": 0.2525094261425091,
|
43721 |
+
"grad_norm": 4.260978698730469,
|
43722 |
+
"learning_rate": 8.541278767346728e-05,
|
43723 |
+
"loss": 0.1998,
|
43724 |
+
"step": 6245
|
43725 |
+
},
|
43726 |
+
{
|
43727 |
+
"epoch": 0.2525498599977761,
|
43728 |
+
"grad_norm": 1.7004098892211914,
|
43729 |
+
"learning_rate": 8.540828528240739e-05,
|
43730 |
+
"loss": 0.0731,
|
43731 |
+
"step": 6246
|
43732 |
+
},
|
43733 |
+
{
|
43734 |
+
"epoch": 0.25259029385304316,
|
43735 |
+
"grad_norm": 4.788060188293457,
|
43736 |
+
"learning_rate": 8.540378231532356e-05,
|
43737 |
+
"loss": 0.0807,
|
43738 |
+
"step": 6247
|
43739 |
+
},
|
43740 |
+
{
|
43741 |
+
"epoch": 0.2526307277083102,
|
43742 |
+
"grad_norm": 5.246776580810547,
|
43743 |
+
"learning_rate": 8.5399278772289e-05,
|
43744 |
+
"loss": 0.1346,
|
43745 |
+
"step": 6248
|
43746 |
+
},
|
43747 |
+
{
|
43748 |
+
"epoch": 0.25267116156357716,
|
43749 |
+
"grad_norm": 8.667241096496582,
|
43750 |
+
"learning_rate": 8.5394774653377e-05,
|
43751 |
+
"loss": 0.2778,
|
43752 |
+
"step": 6249
|
43753 |
+
},
|
43754 |
+
{
|
43755 |
+
"epoch": 0.2527115954188442,
|
43756 |
+
"grad_norm": 2.9778223037719727,
|
43757 |
+
"learning_rate": 8.539026995866082e-05,
|
43758 |
+
"loss": 0.1028,
|
43759 |
+
"step": 6250
|
43760 |
+
},
|
43761 |
+
{
|
43762 |
+
"epoch": 0.2527520292741112,
|
43763 |
+
"grad_norm": 4.117793560028076,
|
43764 |
+
"learning_rate": 8.538576468821375e-05,
|
43765 |
+
"loss": 0.0893,
|
43766 |
+
"step": 6251
|
43767 |
+
},
|
43768 |
+
{
|
43769 |
+
"epoch": 0.25279246312937825,
|
43770 |
+
"grad_norm": 2.23382830619812,
|
43771 |
+
"learning_rate": 8.53812588421091e-05,
|
43772 |
+
"loss": 0.0806,
|
43773 |
+
"step": 6252
|
43774 |
+
},
|
43775 |
+
{
|
43776 |
+
"epoch": 0.25283289698464523,
|
43777 |
+
"grad_norm": 3.395962953567505,
|
43778 |
+
"learning_rate": 8.537675242042014e-05,
|
43779 |
+
"loss": 0.1227,
|
43780 |
+
"step": 6253
|
43781 |
+
},
|
43782 |
+
{
|
43783 |
+
"epoch": 0.25287333083991226,
|
43784 |
+
"grad_norm": 2.672717571258545,
|
43785 |
+
"learning_rate": 8.53722454232202e-05,
|
43786 |
+
"loss": 0.1515,
|
43787 |
+
"step": 6254
|
43788 |
+
},
|
43789 |
+
{
|
43790 |
+
"epoch": 0.2529137646951793,
|
43791 |
+
"grad_norm": 3.2836570739746094,
|
43792 |
+
"learning_rate": 8.536773785058259e-05,
|
43793 |
+
"loss": 0.0952,
|
43794 |
+
"step": 6255
|
43795 |
+
},
|
43796 |
+
{
|
43797 |
+
"epoch": 0.25295419855044626,
|
43798 |
+
"grad_norm": 7.9911980628967285,
|
43799 |
+
"learning_rate": 8.536322970258064e-05,
|
43800 |
+
"loss": 0.2271,
|
43801 |
+
"step": 6256
|
43802 |
+
},
|
43803 |
+
{
|
43804 |
+
"epoch": 0.2529946324057133,
|
43805 |
+
"grad_norm": 3.975504159927368,
|
43806 |
+
"learning_rate": 8.535872097928768e-05,
|
43807 |
+
"loss": 0.1517,
|
43808 |
+
"step": 6257
|
43809 |
+
},
|
43810 |
+
{
|
43811 |
+
"epoch": 0.2530350662609803,
|
43812 |
+
"grad_norm": 8.91515064239502,
|
43813 |
+
"learning_rate": 8.535421168077708e-05,
|
43814 |
+
"loss": 0.2666,
|
43815 |
+
"step": 6258
|
43816 |
+
},
|
43817 |
+
{
|
43818 |
+
"epoch": 0.25307550011624735,
|
43819 |
+
"grad_norm": 4.1537909507751465,
|
43820 |
+
"learning_rate": 8.534970180712221e-05,
|
43821 |
+
"loss": 0.1067,
|
43822 |
+
"step": 6259
|
43823 |
+
},
|
43824 |
+
{
|
43825 |
+
"epoch": 0.25311593397151433,
|
43826 |
+
"grad_norm": 3.3577077388763428,
|
43827 |
+
"learning_rate": 8.534519135839638e-05,
|
43828 |
+
"loss": 0.1172,
|
43829 |
+
"step": 6260
|
43830 |
+
},
|
43831 |
+
{
|
43832 |
+
"epoch": 0.25315636782678136,
|
43833 |
+
"grad_norm": 4.607162952423096,
|
43834 |
+
"learning_rate": 8.534068033467302e-05,
|
43835 |
+
"loss": 0.2664,
|
43836 |
+
"step": 6261
|
43837 |
+
},
|
43838 |
+
{
|
43839 |
+
"epoch": 0.2531968016820484,
|
43840 |
+
"grad_norm": 4.48886251449585,
|
43841 |
+
"learning_rate": 8.53361687360255e-05,
|
43842 |
+
"loss": 0.2104,
|
43843 |
+
"step": 6262
|
43844 |
+
},
|
43845 |
+
{
|
43846 |
+
"epoch": 0.2532372355373154,
|
43847 |
+
"grad_norm": 3.975416898727417,
|
43848 |
+
"learning_rate": 8.533165656252719e-05,
|
43849 |
+
"loss": 0.1517,
|
43850 |
+
"step": 6263
|
43851 |
+
},
|
43852 |
+
{
|
43853 |
+
"epoch": 0.2532776693925824,
|
43854 |
+
"grad_norm": 3.51596736907959,
|
43855 |
+
"learning_rate": 8.532714381425154e-05,
|
43856 |
+
"loss": 0.208,
|
43857 |
+
"step": 6264
|
43858 |
+
},
|
43859 |
+
{
|
43860 |
+
"epoch": 0.2533181032478494,
|
43861 |
+
"grad_norm": 7.504252910614014,
|
43862 |
+
"learning_rate": 8.532263049127192e-05,
|
43863 |
+
"loss": 0.4486,
|
43864 |
+
"step": 6265
|
43865 |
+
},
|
43866 |
+
{
|
43867 |
+
"epoch": 0.25335853710311645,
|
43868 |
+
"grad_norm": 8.218213081359863,
|
43869 |
+
"learning_rate": 8.531811659366178e-05,
|
43870 |
+
"loss": 0.2256,
|
43871 |
+
"step": 6266
|
43872 |
+
},
|
43873 |
+
{
|
43874 |
+
"epoch": 0.25339897095838343,
|
43875 |
+
"grad_norm": 3.353452444076538,
|
43876 |
+
"learning_rate": 8.531360212149455e-05,
|
43877 |
+
"loss": 0.1014,
|
43878 |
+
"step": 6267
|
43879 |
+
},
|
43880 |
+
{
|
43881 |
+
"epoch": 0.25343940481365046,
|
43882 |
+
"grad_norm": 8.242406845092773,
|
43883 |
+
"learning_rate": 8.530908707484367e-05,
|
43884 |
+
"loss": 0.3201,
|
43885 |
+
"step": 6268
|
43886 |
+
},
|
43887 |
+
{
|
43888 |
+
"epoch": 0.2534798386689175,
|
43889 |
+
"grad_norm": 3.9833481311798096,
|
43890 |
+
"learning_rate": 8.530457145378258e-05,
|
43891 |
+
"loss": 0.124,
|
43892 |
+
"step": 6269
|
43893 |
+
},
|
43894 |
+
{
|
43895 |
+
"epoch": 0.2535202725241845,
|
43896 |
+
"grad_norm": 3.9097211360931396,
|
43897 |
+
"learning_rate": 8.530005525838474e-05,
|
43898 |
+
"loss": 0.1897,
|
43899 |
+
"step": 6270
|
43900 |
+
},
|
43901 |
+
{
|
43902 |
+
"epoch": 0.2535607063794515,
|
43903 |
+
"grad_norm": 3.90010666847229,
|
43904 |
+
"learning_rate": 8.529553848872364e-05,
|
43905 |
+
"loss": 0.1575,
|
43906 |
+
"step": 6271
|
43907 |
+
},
|
43908 |
+
{
|
43909 |
+
"epoch": 0.2536011402347185,
|
43910 |
+
"grad_norm": 2.2660040855407715,
|
43911 |
+
"learning_rate": 8.529102114487273e-05,
|
43912 |
+
"loss": 0.0755,
|
43913 |
+
"step": 6272
|
43914 |
+
},
|
43915 |
+
{
|
43916 |
+
"epoch": 0.25364157408998556,
|
43917 |
+
"grad_norm": 5.888993263244629,
|
43918 |
+
"learning_rate": 8.528650322690555e-05,
|
43919 |
+
"loss": 0.189,
|
43920 |
+
"step": 6273
|
43921 |
+
},
|
43922 |
+
{
|
43923 |
+
"epoch": 0.2536820079452526,
|
43924 |
+
"grad_norm": 5.217118263244629,
|
43925 |
+
"learning_rate": 8.528198473489553e-05,
|
43926 |
+
"loss": 0.2706,
|
43927 |
+
"step": 6274
|
43928 |
+
},
|
43929 |
+
{
|
43930 |
+
"epoch": 0.25372244180051956,
|
43931 |
+
"grad_norm": 5.54807186126709,
|
43932 |
+
"learning_rate": 8.527746566891622e-05,
|
43933 |
+
"loss": 0.2454,
|
43934 |
+
"step": 6275
|
43935 |
+
},
|
43936 |
+
{
|
43937 |
+
"epoch": 0.2537628756557866,
|
43938 |
+
"grad_norm": 2.200016498565674,
|
43939 |
+
"learning_rate": 8.527294602904112e-05,
|
43940 |
+
"loss": 0.064,
|
43941 |
+
"step": 6276
|
43942 |
+
},
|
43943 |
+
{
|
43944 |
+
"epoch": 0.2538033095110536,
|
43945 |
+
"grad_norm": 3.341926336288452,
|
43946 |
+
"learning_rate": 8.526842581534376e-05,
|
43947 |
+
"loss": 0.086,
|
43948 |
+
"step": 6277
|
43949 |
+
},
|
43950 |
+
{
|
43951 |
+
"epoch": 0.2538437433663206,
|
43952 |
+
"grad_norm": 5.0617356300354,
|
43953 |
+
"learning_rate": 8.526390502789769e-05,
|
43954 |
+
"loss": 0.1915,
|
43955 |
+
"step": 6278
|
43956 |
+
},
|
43957 |
+
{
|
43958 |
+
"epoch": 0.2538841772215876,
|
43959 |
+
"grad_norm": 4.246654510498047,
|
43960 |
+
"learning_rate": 8.525938366677644e-05,
|
43961 |
+
"loss": 0.2351,
|
43962 |
+
"step": 6279
|
43963 |
+
},
|
43964 |
+
{
|
43965 |
+
"epoch": 0.25392461107685466,
|
43966 |
+
"grad_norm": 2.459105968475342,
|
43967 |
+
"learning_rate": 8.525486173205355e-05,
|
43968 |
+
"loss": 0.135,
|
43969 |
+
"step": 6280
|
43970 |
+
},
|
43971 |
+
{
|
43972 |
+
"epoch": 0.2539650449321217,
|
43973 |
+
"grad_norm": 6.659311771392822,
|
43974 |
+
"learning_rate": 8.525033922380261e-05,
|
43975 |
+
"loss": 0.2173,
|
43976 |
+
"step": 6281
|
43977 |
+
},
|
43978 |
+
{
|
43979 |
+
"epoch": 0.25400547878738866,
|
43980 |
+
"grad_norm": 4.312849521636963,
|
43981 |
+
"learning_rate": 8.524581614209717e-05,
|
43982 |
+
"loss": 0.1036,
|
43983 |
+
"step": 6282
|
43984 |
+
},
|
43985 |
+
{
|
43986 |
+
"epoch": 0.2540459126426557,
|
43987 |
+
"grad_norm": 4.339302062988281,
|
43988 |
+
"learning_rate": 8.524129248701083e-05,
|
43989 |
+
"loss": 0.1592,
|
43990 |
+
"step": 6283
|
43991 |
+
},
|
43992 |
+
{
|
43993 |
+
"epoch": 0.2540863464979227,
|
43994 |
+
"grad_norm": 7.011333465576172,
|
43995 |
+
"learning_rate": 8.523676825861719e-05,
|
43996 |
+
"loss": 0.2355,
|
43997 |
+
"step": 6284
|
43998 |
+
},
|
43999 |
+
{
|
44000 |
+
"epoch": 0.25412678035318975,
|
44001 |
+
"grad_norm": 3.377826690673828,
|
44002 |
+
"learning_rate": 8.523224345698981e-05,
|
44003 |
+
"loss": 0.0733,
|
44004 |
+
"step": 6285
|
44005 |
+
},
|
44006 |
+
{
|
44007 |
+
"epoch": 0.2541672142084567,
|
44008 |
+
"grad_norm": 5.842650890350342,
|
44009 |
+
"learning_rate": 8.522771808220233e-05,
|
44010 |
+
"loss": 0.2519,
|
44011 |
+
"step": 6286
|
44012 |
+
},
|
44013 |
+
{
|
44014 |
+
"epoch": 0.25420764806372376,
|
44015 |
+
"grad_norm": 5.8505659103393555,
|
44016 |
+
"learning_rate": 8.522319213432837e-05,
|
44017 |
+
"loss": 0.1435,
|
44018 |
+
"step": 6287
|
44019 |
+
},
|
44020 |
+
{
|
44021 |
+
"epoch": 0.2542480819189908,
|
44022 |
+
"grad_norm": 5.959987640380859,
|
44023 |
+
"learning_rate": 8.521866561344154e-05,
|
44024 |
+
"loss": 0.2371,
|
44025 |
+
"step": 6288
|
44026 |
+
},
|
44027 |
+
{
|
44028 |
+
"epoch": 0.25428851577425776,
|
44029 |
+
"grad_norm": 2.4413418769836426,
|
44030 |
+
"learning_rate": 8.52141385196155e-05,
|
44031 |
+
"loss": 0.089,
|
44032 |
+
"step": 6289
|
44033 |
+
},
|
44034 |
+
{
|
44035 |
+
"epoch": 0.2543289496295248,
|
44036 |
+
"grad_norm": 4.990113258361816,
|
44037 |
+
"learning_rate": 8.520961085292387e-05,
|
44038 |
+
"loss": 0.1568,
|
44039 |
+
"step": 6290
|
44040 |
+
},
|
44041 |
+
{
|
44042 |
+
"epoch": 0.2543693834847918,
|
44043 |
+
"grad_norm": 7.304047584533691,
|
44044 |
+
"learning_rate": 8.520508261344033e-05,
|
44045 |
+
"loss": 0.2882,
|
44046 |
+
"step": 6291
|
44047 |
+
},
|
44048 |
+
{
|
44049 |
+
"epoch": 0.25440981734005885,
|
44050 |
+
"grad_norm": 3.4831466674804688,
|
44051 |
+
"learning_rate": 8.520055380123855e-05,
|
44052 |
+
"loss": 0.1084,
|
44053 |
+
"step": 6292
|
44054 |
+
},
|
44055 |
+
{
|
44056 |
+
"epoch": 0.2544502511953258,
|
44057 |
+
"grad_norm": 3.8137497901916504,
|
44058 |
+
"learning_rate": 8.519602441639217e-05,
|
44059 |
+
"loss": 0.2194,
|
44060 |
+
"step": 6293
|
44061 |
+
},
|
44062 |
+
{
|
44063 |
+
"epoch": 0.25449068505059286,
|
44064 |
+
"grad_norm": 5.360727310180664,
|
44065 |
+
"learning_rate": 8.519149445897491e-05,
|
44066 |
+
"loss": 0.157,
|
44067 |
+
"step": 6294
|
44068 |
+
},
|
44069 |
+
{
|
44070 |
+
"epoch": 0.2545311189058599,
|
44071 |
+
"grad_norm": 6.933289527893066,
|
44072 |
+
"learning_rate": 8.518696392906045e-05,
|
44073 |
+
"loss": 0.2431,
|
44074 |
+
"step": 6295
|
44075 |
+
},
|
44076 |
+
{
|
44077 |
+
"epoch": 0.2545715527611269,
|
44078 |
+
"grad_norm": 5.3872809410095215,
|
44079 |
+
"learning_rate": 8.518243282672249e-05,
|
44080 |
+
"loss": 0.1341,
|
44081 |
+
"step": 6296
|
44082 |
+
},
|
44083 |
+
{
|
44084 |
+
"epoch": 0.2546119866163939,
|
44085 |
+
"grad_norm": 2.950385093688965,
|
44086 |
+
"learning_rate": 8.517790115203475e-05,
|
44087 |
+
"loss": 0.0747,
|
44088 |
+
"step": 6297
|
44089 |
+
},
|
44090 |
+
{
|
44091 |
+
"epoch": 0.2546524204716609,
|
44092 |
+
"grad_norm": 5.414254665374756,
|
44093 |
+
"learning_rate": 8.517336890507095e-05,
|
44094 |
+
"loss": 0.2068,
|
44095 |
+
"step": 6298
|
44096 |
+
},
|
44097 |
+
{
|
44098 |
+
"epoch": 0.25469285432692795,
|
44099 |
+
"grad_norm": 6.815707206726074,
|
44100 |
+
"learning_rate": 8.516883608590481e-05,
|
44101 |
+
"loss": 0.3236,
|
44102 |
+
"step": 6299
|
44103 |
+
},
|
44104 |
+
{
|
44105 |
+
"epoch": 0.25473328818219493,
|
44106 |
+
"grad_norm": 4.279081344604492,
|
44107 |
+
"learning_rate": 8.516430269461009e-05,
|
44108 |
+
"loss": 0.2014,
|
44109 |
+
"step": 6300
|
44110 |
+
},
|
44111 |
+
{
|
44112 |
+
"epoch": 0.25477372203746196,
|
44113 |
+
"grad_norm": 2.455984592437744,
|
44114 |
+
"learning_rate": 8.515976873126051e-05,
|
44115 |
+
"loss": 0.1983,
|
44116 |
+
"step": 6301
|
44117 |
+
},
|
44118 |
+
{
|
44119 |
+
"epoch": 0.254814155892729,
|
44120 |
+
"grad_norm": 3.6150600910186768,
|
44121 |
+
"learning_rate": 8.515523419592985e-05,
|
44122 |
+
"loss": 0.1326,
|
44123 |
+
"step": 6302
|
44124 |
+
},
|
44125 |
+
{
|
44126 |
+
"epoch": 0.254854589747996,
|
44127 |
+
"grad_norm": 2.019244432449341,
|
44128 |
+
"learning_rate": 8.51506990886919e-05,
|
44129 |
+
"loss": 0.1406,
|
44130 |
+
"step": 6303
|
44131 |
+
},
|
44132 |
+
{
|
44133 |
+
"epoch": 0.254895023603263,
|
44134 |
+
"grad_norm": 6.8846635818481445,
|
44135 |
+
"learning_rate": 8.514616340962038e-05,
|
44136 |
+
"loss": 0.2659,
|
44137 |
+
"step": 6304
|
44138 |
+
},
|
44139 |
+
{
|
44140 |
+
"epoch": 0.25493545745853,
|
44141 |
+
"grad_norm": 3.416940689086914,
|
44142 |
+
"learning_rate": 8.514162715878912e-05,
|
44143 |
+
"loss": 0.2755,
|
44144 |
+
"step": 6305
|
44145 |
+
},
|
44146 |
+
{
|
44147 |
+
"epoch": 0.25497589131379705,
|
44148 |
+
"grad_norm": 4.088443279266357,
|
44149 |
+
"learning_rate": 8.51370903362719e-05,
|
44150 |
+
"loss": 0.0884,
|
44151 |
+
"step": 6306
|
44152 |
+
},
|
44153 |
+
{
|
44154 |
+
"epoch": 0.2550163251690641,
|
44155 |
+
"grad_norm": 1.9420490264892578,
|
44156 |
+
"learning_rate": 8.513255294214255e-05,
|
44157 |
+
"loss": 0.0845,
|
44158 |
+
"step": 6307
|
44159 |
+
},
|
44160 |
+
{
|
44161 |
+
"epoch": 0.25505675902433106,
|
44162 |
+
"grad_norm": 2.7024238109588623,
|
44163 |
+
"learning_rate": 8.512801497647484e-05,
|
44164 |
+
"loss": 0.197,
|
44165 |
+
"step": 6308
|
44166 |
+
},
|
44167 |
+
{
|
44168 |
+
"epoch": 0.2550971928795981,
|
44169 |
+
"grad_norm": 4.50208044052124,
|
44170 |
+
"learning_rate": 8.512347643934264e-05,
|
44171 |
+
"loss": 0.2195,
|
44172 |
+
"step": 6309
|
44173 |
+
},
|
44174 |
+
{
|
44175 |
+
"epoch": 0.2551376267348651,
|
44176 |
+
"grad_norm": 5.4285569190979,
|
44177 |
+
"learning_rate": 8.511893733081975e-05,
|
44178 |
+
"loss": 0.2219,
|
44179 |
+
"step": 6310
|
44180 |
+
},
|
44181 |
+
{
|
44182 |
+
"epoch": 0.2551780605901321,
|
44183 |
+
"grad_norm": 3.334282875061035,
|
44184 |
+
"learning_rate": 8.511439765098001e-05,
|
44185 |
+
"loss": 0.2564,
|
44186 |
+
"step": 6311
|
44187 |
+
},
|
44188 |
+
{
|
44189 |
+
"epoch": 0.2552184944453991,
|
44190 |
+
"grad_norm": 3.280755043029785,
|
44191 |
+
"learning_rate": 8.510985739989731e-05,
|
44192 |
+
"loss": 0.1022,
|
44193 |
+
"step": 6312
|
44194 |
+
},
|
44195 |
+
{
|
44196 |
+
"epoch": 0.25525892830066615,
|
44197 |
+
"grad_norm": 3.1413607597351074,
|
44198 |
+
"learning_rate": 8.510531657764548e-05,
|
44199 |
+
"loss": 0.1204,
|
44200 |
+
"step": 6313
|
44201 |
+
},
|
44202 |
+
{
|
44203 |
+
"epoch": 0.2552993621559332,
|
44204 |
+
"grad_norm": 3.8937203884124756,
|
44205 |
+
"learning_rate": 8.510077518429839e-05,
|
44206 |
+
"loss": 0.1427,
|
44207 |
+
"step": 6314
|
44208 |
+
},
|
44209 |
+
{
|
44210 |
+
"epoch": 0.25533979601120016,
|
44211 |
+
"grad_norm": 9.105998039245605,
|
44212 |
+
"learning_rate": 8.509623321992993e-05,
|
44213 |
+
"loss": 0.2413,
|
44214 |
+
"step": 6315
|
44215 |
+
},
|
44216 |
+
{
|
44217 |
+
"epoch": 0.2553802298664672,
|
44218 |
+
"grad_norm": 3.748194694519043,
|
44219 |
+
"learning_rate": 8.509169068461398e-05,
|
44220 |
+
"loss": 0.2387,
|
44221 |
+
"step": 6316
|
44222 |
+
},
|
44223 |
+
{
|
44224 |
+
"epoch": 0.2554206637217342,
|
44225 |
+
"grad_norm": 2.9352807998657227,
|
44226 |
+
"learning_rate": 8.508714757842445e-05,
|
44227 |
+
"loss": 0.1515,
|
44228 |
+
"step": 6317
|
44229 |
+
},
|
44230 |
+
{
|
44231 |
+
"epoch": 0.25546109757700125,
|
44232 |
+
"grad_norm": 7.0299482345581055,
|
44233 |
+
"learning_rate": 8.508260390143524e-05,
|
44234 |
+
"loss": 0.2198,
|
44235 |
+
"step": 6318
|
44236 |
+
},
|
44237 |
+
{
|
44238 |
+
"epoch": 0.2555015314322682,
|
44239 |
+
"grad_norm": 6.920150279998779,
|
44240 |
+
"learning_rate": 8.507805965372026e-05,
|
44241 |
+
"loss": 0.2549,
|
44242 |
+
"step": 6319
|
44243 |
+
},
|
44244 |
+
{
|
44245 |
+
"epoch": 0.25554196528753526,
|
44246 |
+
"grad_norm": 3.6378939151763916,
|
44247 |
+
"learning_rate": 8.507351483535346e-05,
|
44248 |
+
"loss": 0.1282,
|
44249 |
+
"step": 6320
|
44250 |
+
},
|
44251 |
+
{
|
44252 |
+
"epoch": 0.2555823991428023,
|
44253 |
+
"grad_norm": 3.546814441680908,
|
44254 |
+
"learning_rate": 8.506896944640876e-05,
|
44255 |
+
"loss": 0.2543,
|
44256 |
+
"step": 6321
|
44257 |
+
},
|
44258 |
+
{
|
44259 |
+
"epoch": 0.25562283299806926,
|
44260 |
+
"grad_norm": 5.38877534866333,
|
44261 |
+
"learning_rate": 8.50644234869601e-05,
|
44262 |
+
"loss": 0.126,
|
44263 |
+
"step": 6322
|
44264 |
+
},
|
44265 |
+
{
|
44266 |
+
"epoch": 0.2556632668533363,
|
44267 |
+
"grad_norm": 2.71343994140625,
|
44268 |
+
"learning_rate": 8.505987695708144e-05,
|
44269 |
+
"loss": 0.0831,
|
44270 |
+
"step": 6323
|
44271 |
+
},
|
44272 |
+
{
|
44273 |
+
"epoch": 0.2557037007086033,
|
44274 |
+
"grad_norm": 4.955891132354736,
|
44275 |
+
"learning_rate": 8.505532985684673e-05,
|
44276 |
+
"loss": 0.1422,
|
44277 |
+
"step": 6324
|
44278 |
+
},
|
44279 |
+
{
|
44280 |
+
"epoch": 0.25574413456387035,
|
44281 |
+
"grad_norm": 2.52089786529541,
|
44282 |
+
"learning_rate": 8.505078218632996e-05,
|
44283 |
+
"loss": 0.1084,
|
44284 |
+
"step": 6325
|
44285 |
+
},
|
44286 |
+
{
|
44287 |
+
"epoch": 0.2557845684191373,
|
44288 |
+
"grad_norm": 4.579136848449707,
|
44289 |
+
"learning_rate": 8.504623394560511e-05,
|
44290 |
+
"loss": 0.2748,
|
44291 |
+
"step": 6326
|
44292 |
+
},
|
44293 |
+
{
|
44294 |
+
"epoch": 0.25582500227440436,
|
44295 |
+
"grad_norm": 3.9160637855529785,
|
44296 |
+
"learning_rate": 8.504168513474617e-05,
|
44297 |
+
"loss": 0.1203,
|
44298 |
+
"step": 6327
|
44299 |
+
},
|
44300 |
+
{
|
44301 |
+
"epoch": 0.2558654361296714,
|
44302 |
+
"grad_norm": 4.7934160232543945,
|
44303 |
+
"learning_rate": 8.503713575382712e-05,
|
44304 |
+
"loss": 0.1336,
|
44305 |
+
"step": 6328
|
44306 |
+
},
|
44307 |
+
{
|
44308 |
+
"epoch": 0.2559058699849384,
|
44309 |
+
"grad_norm": 6.95398473739624,
|
44310 |
+
"learning_rate": 8.5032585802922e-05,
|
44311 |
+
"loss": 0.1431,
|
44312 |
+
"step": 6329
|
44313 |
+
},
|
44314 |
+
{
|
44315 |
+
"epoch": 0.2559463038402054,
|
44316 |
+
"grad_norm": 4.516506671905518,
|
44317 |
+
"learning_rate": 8.50280352821048e-05,
|
44318 |
+
"loss": 0.2555,
|
44319 |
+
"step": 6330
|
44320 |
+
},
|
44321 |
+
{
|
44322 |
+
"epoch": 0.2559867376954724,
|
44323 |
+
"grad_norm": 5.366239070892334,
|
44324 |
+
"learning_rate": 8.502348419144959e-05,
|
44325 |
+
"loss": 0.1554,
|
44326 |
+
"step": 6331
|
44327 |
+
},
|
44328 |
+
{
|
44329 |
+
"epoch": 0.25602717155073945,
|
44330 |
+
"grad_norm": 4.950640678405762,
|
44331 |
+
"learning_rate": 8.501893253103036e-05,
|
44332 |
+
"loss": 0.4409,
|
44333 |
+
"step": 6332
|
44334 |
+
},
|
44335 |
+
{
|
44336 |
+
"epoch": 0.2560676054060064,
|
44337 |
+
"grad_norm": 5.238677024841309,
|
44338 |
+
"learning_rate": 8.501438030092119e-05,
|
44339 |
+
"loss": 0.2381,
|
44340 |
+
"step": 6333
|
44341 |
+
},
|
44342 |
+
{
|
44343 |
+
"epoch": 0.25610803926127346,
|
44344 |
+
"grad_norm": 3.452730178833008,
|
44345 |
+
"learning_rate": 8.500982750119613e-05,
|
44346 |
+
"loss": 0.1077,
|
44347 |
+
"step": 6334
|
44348 |
+
},
|
44349 |
+
{
|
44350 |
+
"epoch": 0.2561484731165405,
|
44351 |
+
"grad_norm": 5.175906181335449,
|
44352 |
+
"learning_rate": 8.500527413192922e-05,
|
44353 |
+
"loss": 0.2344,
|
44354 |
+
"step": 6335
|
44355 |
+
},
|
44356 |
+
{
|
44357 |
+
"epoch": 0.2561889069718075,
|
44358 |
+
"grad_norm": 5.024079322814941,
|
44359 |
+
"learning_rate": 8.500072019319456e-05,
|
44360 |
+
"loss": 0.1239,
|
44361 |
+
"step": 6336
|
44362 |
+
},
|
44363 |
+
{
|
44364 |
+
"epoch": 0.2562293408270745,
|
44365 |
+
"grad_norm": 3.608715772628784,
|
44366 |
+
"learning_rate": 8.499616568506624e-05,
|
44367 |
+
"loss": 0.1508,
|
44368 |
+
"step": 6337
|
44369 |
+
},
|
44370 |
+
{
|
44371 |
+
"epoch": 0.2562697746823415,
|
44372 |
+
"grad_norm": 3.933455228805542,
|
44373 |
+
"learning_rate": 8.499161060761831e-05,
|
44374 |
+
"loss": 0.1152,
|
44375 |
+
"step": 6338
|
44376 |
+
},
|
44377 |
+
{
|
44378 |
+
"epoch": 0.25631020853760855,
|
44379 |
+
"grad_norm": 2.304415702819824,
|
44380 |
+
"learning_rate": 8.498705496092492e-05,
|
44381 |
+
"loss": 0.0763,
|
44382 |
+
"step": 6339
|
44383 |
+
},
|
44384 |
+
{
|
44385 |
+
"epoch": 0.2563506423928755,
|
44386 |
+
"grad_norm": 2.713329553604126,
|
44387 |
+
"learning_rate": 8.498249874506016e-05,
|
44388 |
+
"loss": 0.1807,
|
44389 |
+
"step": 6340
|
44390 |
+
},
|
44391 |
+
{
|
44392 |
+
"epoch": 0.25639107624814256,
|
44393 |
+
"grad_norm": 4.124869346618652,
|
44394 |
+
"learning_rate": 8.497794196009816e-05,
|
44395 |
+
"loss": 0.1265,
|
44396 |
+
"step": 6341
|
44397 |
+
},
|
44398 |
+
{
|
44399 |
+
"epoch": 0.2564315101034096,
|
44400 |
+
"grad_norm": 4.125143051147461,
|
44401 |
+
"learning_rate": 8.497338460611303e-05,
|
44402 |
+
"loss": 0.1421,
|
44403 |
+
"step": 6342
|
44404 |
+
},
|
44405 |
+
{
|
44406 |
+
"epoch": 0.2564719439586766,
|
44407 |
+
"grad_norm": 5.568498134613037,
|
44408 |
+
"learning_rate": 8.496882668317896e-05,
|
44409 |
+
"loss": 0.1275,
|
44410 |
+
"step": 6343
|
44411 |
+
},
|
44412 |
+
{
|
44413 |
+
"epoch": 0.2565123778139436,
|
44414 |
+
"grad_norm": 3.043327808380127,
|
44415 |
+
"learning_rate": 8.496426819137003e-05,
|
44416 |
+
"loss": 0.089,
|
44417 |
+
"step": 6344
|
44418 |
+
},
|
44419 |
+
{
|
44420 |
+
"epoch": 0.2565528116692106,
|
44421 |
+
"grad_norm": 1.9587103128433228,
|
44422 |
+
"learning_rate": 8.495970913076043e-05,
|
44423 |
+
"loss": 0.0663,
|
44424 |
+
"step": 6345
|
44425 |
+
},
|
44426 |
+
{
|
44427 |
+
"epoch": 0.25659324552447765,
|
44428 |
+
"grad_norm": 5.768272399902344,
|
44429 |
+
"learning_rate": 8.495514950142433e-05,
|
44430 |
+
"loss": 0.1119,
|
44431 |
+
"step": 6346
|
44432 |
+
},
|
44433 |
+
{
|
44434 |
+
"epoch": 0.2566336793797447,
|
44435 |
+
"grad_norm": 4.563802242279053,
|
44436 |
+
"learning_rate": 8.495058930343592e-05,
|
44437 |
+
"loss": 0.1408,
|
44438 |
+
"step": 6347
|
44439 |
+
},
|
44440 |
+
{
|
44441 |
+
"epoch": 0.25667411323501166,
|
44442 |
+
"grad_norm": 3.0931496620178223,
|
44443 |
+
"learning_rate": 8.494602853686936e-05,
|
44444 |
+
"loss": 0.1668,
|
44445 |
+
"step": 6348
|
44446 |
+
},
|
44447 |
+
{
|
44448 |
+
"epoch": 0.2567145470902787,
|
44449 |
+
"grad_norm": 3.356067657470703,
|
44450 |
+
"learning_rate": 8.494146720179885e-05,
|
44451 |
+
"loss": 0.1318,
|
44452 |
+
"step": 6349
|
44453 |
+
},
|
44454 |
+
{
|
44455 |
+
"epoch": 0.2567549809455457,
|
44456 |
+
"grad_norm": 8.33302116394043,
|
44457 |
+
"learning_rate": 8.493690529829861e-05,
|
44458 |
+
"loss": 0.2704,
|
44459 |
+
"step": 6350
|
44460 |
+
},
|
44461 |
+
{
|
44462 |
+
"epoch": 0.2567954148008127,
|
44463 |
+
"grad_norm": 7.626204013824463,
|
44464 |
+
"learning_rate": 8.493234282644282e-05,
|
44465 |
+
"loss": 0.2377,
|
44466 |
+
"step": 6351
|
44467 |
+
},
|
44468 |
+
{
|
44469 |
+
"epoch": 0.2568358486560797,
|
44470 |
+
"grad_norm": 2.6277921199798584,
|
44471 |
+
"learning_rate": 8.492777978630574e-05,
|
44472 |
+
"loss": 0.2527,
|
44473 |
+
"step": 6352
|
44474 |
+
},
|
44475 |
+
{
|
44476 |
+
"epoch": 0.25687628251134675,
|
44477 |
+
"grad_norm": 1.9599628448486328,
|
44478 |
+
"learning_rate": 8.492321617796157e-05,
|
44479 |
+
"loss": 0.0878,
|
44480 |
+
"step": 6353
|
44481 |
+
},
|
44482 |
+
{
|
44483 |
+
"epoch": 0.2569167163666138,
|
44484 |
+
"grad_norm": 6.0915703773498535,
|
44485 |
+
"learning_rate": 8.491865200148458e-05,
|
44486 |
+
"loss": 0.2377,
|
44487 |
+
"step": 6354
|
44488 |
+
},
|
44489 |
+
{
|
44490 |
+
"epoch": 0.25695715022188076,
|
44491 |
+
"grad_norm": 2.4235754013061523,
|
44492 |
+
"learning_rate": 8.491408725694902e-05,
|
44493 |
+
"loss": 0.07,
|
44494 |
+
"step": 6355
|
44495 |
+
},
|
44496 |
+
{
|
44497 |
+
"epoch": 0.2569975840771478,
|
44498 |
+
"grad_norm": 4.932924747467041,
|
44499 |
+
"learning_rate": 8.490952194442912e-05,
|
44500 |
+
"loss": 0.1753,
|
44501 |
+
"step": 6356
|
44502 |
+
},
|
44503 |
+
{
|
44504 |
+
"epoch": 0.2570380179324148,
|
44505 |
+
"grad_norm": 2.017972707748413,
|
44506 |
+
"learning_rate": 8.490495606399915e-05,
|
44507 |
+
"loss": 0.0895,
|
44508 |
+
"step": 6357
|
44509 |
+
},
|
44510 |
+
{
|
44511 |
+
"epoch": 0.25707845178768185,
|
44512 |
+
"grad_norm": 3.0645298957824707,
|
44513 |
+
"learning_rate": 8.490038961573344e-05,
|
44514 |
+
"loss": 0.1286,
|
44515 |
+
"step": 6358
|
44516 |
+
},
|
44517 |
+
{
|
44518 |
+
"epoch": 0.2571188856429488,
|
44519 |
+
"grad_norm": 6.763504981994629,
|
44520 |
+
"learning_rate": 8.489582259970621e-05,
|
44521 |
+
"loss": 0.1615,
|
44522 |
+
"step": 6359
|
44523 |
+
},
|
44524 |
+
{
|
44525 |
+
"epoch": 0.25715931949821585,
|
44526 |
+
"grad_norm": 10.51475715637207,
|
44527 |
+
"learning_rate": 8.48912550159918e-05,
|
44528 |
+
"loss": 0.2463,
|
44529 |
+
"step": 6360
|
44530 |
+
},
|
44531 |
+
{
|
44532 |
+
"epoch": 0.2571997533534829,
|
44533 |
+
"grad_norm": 6.952187538146973,
|
44534 |
+
"learning_rate": 8.48866868646645e-05,
|
44535 |
+
"loss": 0.2318,
|
44536 |
+
"step": 6361
|
44537 |
+
},
|
44538 |
+
{
|
44539 |
+
"epoch": 0.25724018720874986,
|
44540 |
+
"grad_norm": 7.947668552398682,
|
44541 |
+
"learning_rate": 8.488211814579862e-05,
|
44542 |
+
"loss": 0.1819,
|
44543 |
+
"step": 6362
|
44544 |
+
},
|
44545 |
+
{
|
44546 |
+
"epoch": 0.2572806210640169,
|
44547 |
+
"grad_norm": 5.73841667175293,
|
44548 |
+
"learning_rate": 8.48775488594685e-05,
|
44549 |
+
"loss": 0.1764,
|
44550 |
+
"step": 6363
|
44551 |
+
},
|
44552 |
+
{
|
44553 |
+
"epoch": 0.2573210549192839,
|
44554 |
+
"grad_norm": 5.9241533279418945,
|
44555 |
+
"learning_rate": 8.487297900574847e-05,
|
44556 |
+
"loss": 0.1824,
|
44557 |
+
"step": 6364
|
44558 |
+
},
|
44559 |
+
{
|
44560 |
+
"epoch": 0.25736148877455095,
|
44561 |
+
"grad_norm": 5.1504387855529785,
|
44562 |
+
"learning_rate": 8.486840858471286e-05,
|
44563 |
+
"loss": 0.1751,
|
44564 |
+
"step": 6365
|
44565 |
+
},
|
44566 |
+
{
|
44567 |
+
"epoch": 0.2574019226298179,
|
44568 |
+
"grad_norm": 5.617988109588623,
|
44569 |
+
"learning_rate": 8.486383759643602e-05,
|
44570 |
+
"loss": 0.1754,
|
44571 |
+
"step": 6366
|
44572 |
+
},
|
44573 |
+
{
|
44574 |
+
"epoch": 0.25744235648508496,
|
44575 |
+
"grad_norm": 4.458637237548828,
|
44576 |
+
"learning_rate": 8.485926604099233e-05,
|
44577 |
+
"loss": 0.1956,
|
44578 |
+
"step": 6367
|
44579 |
+
},
|
44580 |
+
{
|
44581 |
+
"epoch": 0.257482790340352,
|
44582 |
+
"grad_norm": 5.067800998687744,
|
44583 |
+
"learning_rate": 8.485469391845615e-05,
|
44584 |
+
"loss": 0.1718,
|
44585 |
+
"step": 6368
|
44586 |
+
},
|
44587 |
+
{
|
44588 |
+
"epoch": 0.257523224195619,
|
44589 |
+
"grad_norm": 3.406689167022705,
|
44590 |
+
"learning_rate": 8.485012122890187e-05,
|
44591 |
+
"loss": 0.1398,
|
44592 |
+
"step": 6369
|
44593 |
+
},
|
44594 |
+
{
|
44595 |
+
"epoch": 0.257563658050886,
|
44596 |
+
"grad_norm": 9.782722473144531,
|
44597 |
+
"learning_rate": 8.484554797240387e-05,
|
44598 |
+
"loss": 0.2298,
|
44599 |
+
"step": 6370
|
44600 |
+
},
|
44601 |
+
{
|
44602 |
+
"epoch": 0.257604091906153,
|
44603 |
+
"grad_norm": 16.46589469909668,
|
44604 |
+
"learning_rate": 8.484097414903654e-05,
|
44605 |
+
"loss": 0.3264,
|
44606 |
+
"step": 6371
|
44607 |
+
},
|
44608 |
+
{
|
44609 |
+
"epoch": 0.25764452576142005,
|
44610 |
+
"grad_norm": 3.9720211029052734,
|
44611 |
+
"learning_rate": 8.483639975887429e-05,
|
44612 |
+
"loss": 0.1507,
|
44613 |
+
"step": 6372
|
44614 |
+
},
|
44615 |
+
{
|
44616 |
+
"epoch": 0.257684959616687,
|
44617 |
+
"grad_norm": 2.8719277381896973,
|
44618 |
+
"learning_rate": 8.483182480199155e-05,
|
44619 |
+
"loss": 0.0907,
|
44620 |
+
"step": 6373
|
44621 |
+
},
|
44622 |
+
{
|
44623 |
+
"epoch": 0.25772539347195406,
|
44624 |
+
"grad_norm": 1.6157563924789429,
|
44625 |
+
"learning_rate": 8.482724927846273e-05,
|
44626 |
+
"loss": 0.0721,
|
44627 |
+
"step": 6374
|
44628 |
+
},
|
44629 |
+
{
|
44630 |
+
"epoch": 0.2577658273272211,
|
44631 |
+
"grad_norm": 1.839153528213501,
|
44632 |
+
"learning_rate": 8.482267318836225e-05,
|
44633 |
+
"loss": 0.1382,
|
44634 |
+
"step": 6375
|
44635 |
+
},
|
44636 |
+
{
|
44637 |
+
"epoch": 0.2578062611824881,
|
44638 |
+
"grad_norm": 4.852220058441162,
|
44639 |
+
"learning_rate": 8.481809653176462e-05,
|
44640 |
+
"loss": 0.2959,
|
44641 |
+
"step": 6376
|
44642 |
+
},
|
44643 |
+
{
|
44644 |
+
"epoch": 0.2578466950377551,
|
44645 |
+
"grad_norm": 2.376772165298462,
|
44646 |
+
"learning_rate": 8.481351930874423e-05,
|
44647 |
+
"loss": 0.0954,
|
44648 |
+
"step": 6377
|
44649 |
+
},
|
44650 |
+
{
|
44651 |
+
"epoch": 0.2578871288930221,
|
44652 |
+
"grad_norm": 2.801809549331665,
|
44653 |
+
"learning_rate": 8.480894151937556e-05,
|
44654 |
+
"loss": 0.1136,
|
44655 |
+
"step": 6378
|
44656 |
+
},
|
44657 |
+
{
|
44658 |
+
"epoch": 0.25792756274828915,
|
44659 |
+
"grad_norm": 2.7802624702453613,
|
44660 |
+
"learning_rate": 8.480436316373308e-05,
|
44661 |
+
"loss": 0.1464,
|
44662 |
+
"step": 6379
|
44663 |
+
},
|
44664 |
+
{
|
44665 |
+
"epoch": 0.2579679966035562,
|
44666 |
+
"grad_norm": 3.514580249786377,
|
44667 |
+
"learning_rate": 8.479978424189128e-05,
|
44668 |
+
"loss": 0.0766,
|
44669 |
+
"step": 6380
|
44670 |
+
},
|
44671 |
+
{
|
44672 |
+
"epoch": 0.25800843045882316,
|
44673 |
+
"grad_norm": 3.6027603149414062,
|
44674 |
+
"learning_rate": 8.479520475392463e-05,
|
44675 |
+
"loss": 0.2039,
|
44676 |
+
"step": 6381
|
44677 |
+
},
|
44678 |
+
{
|
44679 |
+
"epoch": 0.2580488643140902,
|
44680 |
+
"grad_norm": 3.665100336074829,
|
44681 |
+
"learning_rate": 8.479062469990767e-05,
|
44682 |
+
"loss": 0.1421,
|
44683 |
+
"step": 6382
|
44684 |
+
},
|
44685 |
+
{
|
44686 |
+
"epoch": 0.2580892981693572,
|
44687 |
+
"grad_norm": 2.225731611251831,
|
44688 |
+
"learning_rate": 8.478604407991486e-05,
|
44689 |
+
"loss": 0.0943,
|
44690 |
+
"step": 6383
|
44691 |
+
},
|
44692 |
+
{
|
44693 |
+
"epoch": 0.2581297320246242,
|
44694 |
+
"grad_norm": 5.70697021484375,
|
44695 |
+
"learning_rate": 8.478146289402074e-05,
|
44696 |
+
"loss": 0.1902,
|
44697 |
+
"step": 6384
|
44698 |
+
},
|
44699 |
+
{
|
44700 |
+
"epoch": 0.2581701658798912,
|
44701 |
+
"grad_norm": 10.20688247680664,
|
44702 |
+
"learning_rate": 8.477688114229985e-05,
|
44703 |
+
"loss": 0.3306,
|
44704 |
+
"step": 6385
|
44705 |
+
},
|
44706 |
+
{
|
44707 |
+
"epoch": 0.25821059973515825,
|
44708 |
+
"grad_norm": 5.550793647766113,
|
44709 |
+
"learning_rate": 8.477229882482671e-05,
|
44710 |
+
"loss": 0.1043,
|
44711 |
+
"step": 6386
|
44712 |
+
},
|
44713 |
+
{
|
44714 |
+
"epoch": 0.2582510335904253,
|
44715 |
+
"grad_norm": 5.5182600021362305,
|
44716 |
+
"learning_rate": 8.476771594167585e-05,
|
44717 |
+
"loss": 0.1728,
|
44718 |
+
"step": 6387
|
44719 |
+
},
|
44720 |
+
{
|
44721 |
+
"epoch": 0.25829146744569226,
|
44722 |
+
"grad_norm": 7.05347204208374,
|
44723 |
+
"learning_rate": 8.476313249292187e-05,
|
44724 |
+
"loss": 0.1392,
|
44725 |
+
"step": 6388
|
44726 |
+
},
|
44727 |
+
{
|
44728 |
+
"epoch": 0.2583319013009593,
|
44729 |
+
"grad_norm": 4.397397041320801,
|
44730 |
+
"learning_rate": 8.475854847863929e-05,
|
44731 |
+
"loss": 0.096,
|
44732 |
+
"step": 6389
|
44733 |
+
},
|
44734 |
+
{
|
44735 |
+
"epoch": 0.2583723351562263,
|
44736 |
+
"grad_norm": 5.273245334625244,
|
44737 |
+
"learning_rate": 8.47539638989027e-05,
|
44738 |
+
"loss": 0.1418,
|
44739 |
+
"step": 6390
|
44740 |
+
},
|
44741 |
+
{
|
44742 |
+
"epoch": 0.25841276901149335,
|
44743 |
+
"grad_norm": 4.012918472290039,
|
44744 |
+
"learning_rate": 8.474937875378669e-05,
|
44745 |
+
"loss": 0.1406,
|
44746 |
+
"step": 6391
|
44747 |
+
},
|
44748 |
+
{
|
44749 |
+
"epoch": 0.2584532028667603,
|
44750 |
+
"grad_norm": 4.039473056793213,
|
44751 |
+
"learning_rate": 8.474479304336584e-05,
|
44752 |
+
"loss": 0.1272,
|
44753 |
+
"step": 6392
|
44754 |
+
},
|
44755 |
+
{
|
44756 |
+
"epoch": 0.25849363672202735,
|
44757 |
+
"grad_norm": 2.7828409671783447,
|
44758 |
+
"learning_rate": 8.474020676771476e-05,
|
44759 |
+
"loss": 0.0875,
|
44760 |
+
"step": 6393
|
44761 |
+
},
|
44762 |
+
{
|
44763 |
+
"epoch": 0.2585340705772944,
|
44764 |
+
"grad_norm": 3.8492186069488525,
|
44765 |
+
"learning_rate": 8.473561992690805e-05,
|
44766 |
+
"loss": 0.1361,
|
44767 |
+
"step": 6394
|
44768 |
+
},
|
44769 |
+
{
|
44770 |
+
"epoch": 0.25857450443256136,
|
44771 |
+
"grad_norm": 5.878203868865967,
|
44772 |
+
"learning_rate": 8.473103252102033e-05,
|
44773 |
+
"loss": 0.2268,
|
44774 |
+
"step": 6395
|
44775 |
+
},
|
44776 |
+
{
|
44777 |
+
"epoch": 0.2586149382878284,
|
44778 |
+
"grad_norm": 2.3049988746643066,
|
44779 |
+
"learning_rate": 8.472644455012623e-05,
|
44780 |
+
"loss": 0.0793,
|
44781 |
+
"step": 6396
|
44782 |
+
},
|
44783 |
+
{
|
44784 |
+
"epoch": 0.2586553721430954,
|
44785 |
+
"grad_norm": 4.549858093261719,
|
44786 |
+
"learning_rate": 8.47218560143004e-05,
|
44787 |
+
"loss": 0.0883,
|
44788 |
+
"step": 6397
|
44789 |
+
},
|
44790 |
+
{
|
44791 |
+
"epoch": 0.25869580599836245,
|
44792 |
+
"grad_norm": 3.8747036457061768,
|
44793 |
+
"learning_rate": 8.471726691361747e-05,
|
44794 |
+
"loss": 0.1501,
|
44795 |
+
"step": 6398
|
44796 |
+
},
|
44797 |
+
{
|
44798 |
+
"epoch": 0.2587362398536294,
|
44799 |
+
"grad_norm": 6.015817642211914,
|
44800 |
+
"learning_rate": 8.471267724815209e-05,
|
44801 |
+
"loss": 0.2054,
|
44802 |
+
"step": 6399
|
44803 |
+
},
|
44804 |
+
{
|
44805 |
+
"epoch": 0.25877667370889645,
|
44806 |
+
"grad_norm": 12.059225082397461,
|
44807 |
+
"learning_rate": 8.470808701797894e-05,
|
44808 |
+
"loss": 0.2836,
|
44809 |
+
"step": 6400
|
44810 |
+
},
|
44811 |
+
{
|
44812 |
+
"epoch": 0.2588171075641635,
|
44813 |
+
"grad_norm": 7.788859844207764,
|
44814 |
+
"learning_rate": 8.470349622317269e-05,
|
44815 |
+
"loss": 0.2607,
|
44816 |
+
"step": 6401
|
44817 |
+
},
|
44818 |
+
{
|
44819 |
+
"epoch": 0.2588575414194305,
|
44820 |
+
"grad_norm": 4.372074604034424,
|
44821 |
+
"learning_rate": 8.469890486380802e-05,
|
44822 |
+
"loss": 0.2265,
|
44823 |
+
"step": 6402
|
44824 |
+
},
|
44825 |
+
{
|
44826 |
+
"epoch": 0.2588979752746975,
|
44827 |
+
"grad_norm": 5.639162540435791,
|
44828 |
+
"learning_rate": 8.469431293995964e-05,
|
44829 |
+
"loss": 0.1593,
|
44830 |
+
"step": 6403
|
44831 |
+
},
|
44832 |
+
{
|
44833 |
+
"epoch": 0.2589384091299645,
|
44834 |
+
"grad_norm": 5.578627109527588,
|
44835 |
+
"learning_rate": 8.468972045170223e-05,
|
44836 |
+
"loss": 0.1215,
|
44837 |
+
"step": 6404
|
44838 |
+
},
|
44839 |
+
{
|
44840 |
+
"epoch": 0.25897884298523155,
|
44841 |
+
"grad_norm": 5.033603191375732,
|
44842 |
+
"learning_rate": 8.468512739911051e-05,
|
44843 |
+
"loss": 0.2522,
|
44844 |
+
"step": 6405
|
44845 |
+
},
|
44846 |
+
{
|
44847 |
+
"epoch": 0.2590192768404985,
|
44848 |
+
"grad_norm": 5.399876594543457,
|
44849 |
+
"learning_rate": 8.468053378225919e-05,
|
44850 |
+
"loss": 0.1563,
|
44851 |
+
"step": 6406
|
44852 |
+
},
|
44853 |
+
{
|
44854 |
+
"epoch": 0.25905971069576555,
|
44855 |
+
"grad_norm": 5.1479315757751465,
|
44856 |
+
"learning_rate": 8.467593960122302e-05,
|
44857 |
+
"loss": 0.0807,
|
44858 |
+
"step": 6407
|
44859 |
+
},
|
44860 |
+
{
|
44861 |
+
"epoch": 0.2591001445510326,
|
44862 |
+
"grad_norm": 2.96193265914917,
|
44863 |
+
"learning_rate": 8.467134485607671e-05,
|
44864 |
+
"loss": 0.1433,
|
44865 |
+
"step": 6408
|
44866 |
+
},
|
44867 |
+
{
|
44868 |
+
"epoch": 0.2591405784062996,
|
44869 |
+
"grad_norm": 8.882711410522461,
|
44870 |
+
"learning_rate": 8.466674954689504e-05,
|
44871 |
+
"loss": 0.2025,
|
44872 |
+
"step": 6409
|
44873 |
+
},
|
44874 |
+
{
|
44875 |
+
"epoch": 0.2591810122615666,
|
44876 |
+
"grad_norm": 6.4518914222717285,
|
44877 |
+
"learning_rate": 8.466215367375274e-05,
|
44878 |
+
"loss": 0.1386,
|
44879 |
+
"step": 6410
|
44880 |
+
},
|
44881 |
+
{
|
44882 |
+
"epoch": 0.2592214461168336,
|
44883 |
+
"grad_norm": 2.5367136001586914,
|
44884 |
+
"learning_rate": 8.465755723672459e-05,
|
44885 |
+
"loss": 0.1707,
|
44886 |
+
"step": 6411
|
44887 |
+
},
|
44888 |
+
{
|
44889 |
+
"epoch": 0.25926187997210065,
|
44890 |
+
"grad_norm": 6.0566182136535645,
|
44891 |
+
"learning_rate": 8.465296023588536e-05,
|
44892 |
+
"loss": 0.2651,
|
44893 |
+
"step": 6412
|
44894 |
+
},
|
44895 |
+
{
|
44896 |
+
"epoch": 0.2593023138273677,
|
44897 |
+
"grad_norm": 4.471607208251953,
|
44898 |
+
"learning_rate": 8.464836267130982e-05,
|
44899 |
+
"loss": 0.2542,
|
44900 |
+
"step": 6413
|
44901 |
+
},
|
44902 |
+
{
|
44903 |
+
"epoch": 0.25934274768263466,
|
44904 |
+
"grad_norm": 8.036651611328125,
|
44905 |
+
"learning_rate": 8.46437645430728e-05,
|
44906 |
+
"loss": 0.1938,
|
44907 |
+
"step": 6414
|
44908 |
+
},
|
44909 |
+
{
|
44910 |
+
"epoch": 0.2593831815379017,
|
44911 |
+
"grad_norm": 6.1860833168029785,
|
44912 |
+
"learning_rate": 8.463916585124908e-05,
|
44913 |
+
"loss": 0.3583,
|
44914 |
+
"step": 6415
|
44915 |
+
},
|
44916 |
+
{
|
44917 |
+
"epoch": 0.2594236153931687,
|
44918 |
+
"grad_norm": 2.4040839672088623,
|
44919 |
+
"learning_rate": 8.463456659591347e-05,
|
44920 |
+
"loss": 0.0842,
|
44921 |
+
"step": 6416
|
44922 |
+
},
|
44923 |
+
{
|
44924 |
+
"epoch": 0.2594640492484357,
|
44925 |
+
"grad_norm": 4.007444858551025,
|
44926 |
+
"learning_rate": 8.462996677714078e-05,
|
44927 |
+
"loss": 0.2408,
|
44928 |
+
"step": 6417
|
44929 |
+
},
|
44930 |
+
{
|
44931 |
+
"epoch": 0.2595044831037027,
|
44932 |
+
"grad_norm": 2.925011157989502,
|
44933 |
+
"learning_rate": 8.462536639500587e-05,
|
44934 |
+
"loss": 0.1108,
|
44935 |
+
"step": 6418
|
44936 |
+
},
|
44937 |
+
{
|
44938 |
+
"epoch": 0.25954491695896975,
|
44939 |
+
"grad_norm": 8.07170581817627,
|
44940 |
+
"learning_rate": 8.462076544958357e-05,
|
44941 |
+
"loss": 0.2668,
|
44942 |
+
"step": 6419
|
44943 |
+
},
|
44944 |
+
{
|
44945 |
+
"epoch": 0.2595853508142368,
|
44946 |
+
"grad_norm": 5.603867053985596,
|
44947 |
+
"learning_rate": 8.461616394094871e-05,
|
44948 |
+
"loss": 0.189,
|
44949 |
+
"step": 6420
|
44950 |
+
},
|
44951 |
+
{
|
44952 |
+
"epoch": 0.25962578466950376,
|
44953 |
+
"grad_norm": 3.7268362045288086,
|
44954 |
+
"learning_rate": 8.461156186917617e-05,
|
44955 |
+
"loss": 0.0929,
|
44956 |
+
"step": 6421
|
44957 |
+
},
|
44958 |
+
{
|
44959 |
+
"epoch": 0.2596662185247708,
|
44960 |
+
"grad_norm": 4.756371021270752,
|
44961 |
+
"learning_rate": 8.460695923434079e-05,
|
44962 |
+
"loss": 0.1305,
|
44963 |
+
"step": 6422
|
44964 |
+
},
|
44965 |
+
{
|
44966 |
+
"epoch": 0.2597066523800378,
|
44967 |
+
"grad_norm": 7.001537322998047,
|
44968 |
+
"learning_rate": 8.460235603651746e-05,
|
44969 |
+
"loss": 0.2624,
|
44970 |
+
"step": 6423
|
44971 |
+
},
|
44972 |
+
{
|
44973 |
+
"epoch": 0.25974708623530485,
|
44974 |
+
"grad_norm": 3.183443784713745,
|
44975 |
+
"learning_rate": 8.459775227578109e-05,
|
44976 |
+
"loss": 0.2155,
|
44977 |
+
"step": 6424
|
44978 |
+
},
|
44979 |
+
{
|
44980 |
+
"epoch": 0.2597875200905718,
|
44981 |
+
"grad_norm": 3.8235130310058594,
|
44982 |
+
"learning_rate": 8.459314795220652e-05,
|
44983 |
+
"loss": 0.2506,
|
44984 |
+
"step": 6425
|
44985 |
+
},
|
44986 |
+
{
|
44987 |
+
"epoch": 0.25982795394583885,
|
44988 |
+
"grad_norm": 2.616227149963379,
|
44989 |
+
"learning_rate": 8.458854306586872e-05,
|
44990 |
+
"loss": 0.1201,
|
44991 |
+
"step": 6426
|
44992 |
+
},
|
44993 |
+
{
|
44994 |
+
"epoch": 0.2598683878011059,
|
44995 |
+
"grad_norm": 1.2675552368164062,
|
44996 |
+
"learning_rate": 8.458393761684255e-05,
|
44997 |
+
"loss": 0.0644,
|
44998 |
+
"step": 6427
|
44999 |
+
},
|
45000 |
+
{
|
45001 |
+
"epoch": 0.25990882165637286,
|
45002 |
+
"grad_norm": 3.470249652862549,
|
45003 |
+
"learning_rate": 8.457933160520295e-05,
|
45004 |
+
"loss": 0.1924,
|
45005 |
+
"step": 6428
|
45006 |
+
},
|
45007 |
+
{
|
45008 |
+
"epoch": 0.2599492555116399,
|
45009 |
+
"grad_norm": 3.0673654079437256,
|
45010 |
+
"learning_rate": 8.457472503102484e-05,
|
45011 |
+
"loss": 0.1778,
|
45012 |
+
"step": 6429
|
45013 |
+
},
|
45014 |
+
{
|
45015 |
+
"epoch": 0.2599896893669069,
|
45016 |
+
"grad_norm": 5.870110511779785,
|
45017 |
+
"learning_rate": 8.457011789438319e-05,
|
45018 |
+
"loss": 0.3368,
|
45019 |
+
"step": 6430
|
45020 |
+
},
|
45021 |
+
{
|
45022 |
+
"epoch": 0.26003012322217395,
|
45023 |
+
"grad_norm": 2.752155303955078,
|
45024 |
+
"learning_rate": 8.456551019535293e-05,
|
45025 |
+
"loss": 0.1311,
|
45026 |
+
"step": 6431
|
45027 |
+
},
|
45028 |
+
{
|
45029 |
+
"epoch": 0.2600705570774409,
|
45030 |
+
"grad_norm": 4.4634833335876465,
|
45031 |
+
"learning_rate": 8.4560901934009e-05,
|
45032 |
+
"loss": 0.2769,
|
45033 |
+
"step": 6432
|
45034 |
+
},
|
45035 |
+
{
|
45036 |
+
"epoch": 0.26011099093270795,
|
45037 |
+
"grad_norm": 5.72117280960083,
|
45038 |
+
"learning_rate": 8.455629311042639e-05,
|
45039 |
+
"loss": 0.1678,
|
45040 |
+
"step": 6433
|
45041 |
+
},
|
45042 |
+
{
|
45043 |
+
"epoch": 0.260151424787975,
|
45044 |
+
"grad_norm": 3.4487555027008057,
|
45045 |
+
"learning_rate": 8.455168372468007e-05,
|
45046 |
+
"loss": 0.1195,
|
45047 |
+
"step": 6434
|
45048 |
+
},
|
45049 |
+
{
|
45050 |
+
"epoch": 0.260191858643242,
|
45051 |
+
"grad_norm": 2.938300132751465,
|
45052 |
+
"learning_rate": 8.454707377684503e-05,
|
45053 |
+
"loss": 0.1321,
|
45054 |
+
"step": 6435
|
45055 |
+
},
|
45056 |
+
{
|
45057 |
+
"epoch": 0.260232292498509,
|
45058 |
+
"grad_norm": 4.485689640045166,
|
45059 |
+
"learning_rate": 8.454246326699626e-05,
|
45060 |
+
"loss": 0.1388,
|
45061 |
+
"step": 6436
|
45062 |
+
},
|
45063 |
+
{
|
45064 |
+
"epoch": 0.260272726353776,
|
45065 |
+
"grad_norm": 11.9823637008667,
|
45066 |
+
"learning_rate": 8.453785219520878e-05,
|
45067 |
+
"loss": 0.2942,
|
45068 |
+
"step": 6437
|
45069 |
+
},
|
45070 |
+
{
|
45071 |
+
"epoch": 0.26031316020904305,
|
45072 |
+
"grad_norm": 5.374208927154541,
|
45073 |
+
"learning_rate": 8.453324056155758e-05,
|
45074 |
+
"loss": 0.3151,
|
45075 |
+
"step": 6438
|
45076 |
+
},
|
45077 |
+
{
|
45078 |
+
"epoch": 0.26035359406431,
|
45079 |
+
"grad_norm": 6.791523456573486,
|
45080 |
+
"learning_rate": 8.452862836611768e-05,
|
45081 |
+
"loss": 0.1854,
|
45082 |
+
"step": 6439
|
45083 |
+
},
|
45084 |
+
{
|
45085 |
+
"epoch": 0.26039402791957705,
|
45086 |
+
"grad_norm": 3.6393191814422607,
|
45087 |
+
"learning_rate": 8.452401560896415e-05,
|
45088 |
+
"loss": 0.1342,
|
45089 |
+
"step": 6440
|
45090 |
+
},
|
45091 |
+
{
|
45092 |
+
"epoch": 0.2604344617748441,
|
45093 |
+
"grad_norm": 3.012181282043457,
|
45094 |
+
"learning_rate": 8.4519402290172e-05,
|
45095 |
+
"loss": 0.1095,
|
45096 |
+
"step": 6441
|
45097 |
+
},
|
45098 |
+
{
|
45099 |
+
"epoch": 0.2604748956301111,
|
45100 |
+
"grad_norm": 5.776113033294678,
|
45101 |
+
"learning_rate": 8.451478840981626e-05,
|
45102 |
+
"loss": 0.1903,
|
45103 |
+
"step": 6442
|
45104 |
+
},
|
45105 |
+
{
|
45106 |
+
"epoch": 0.2605153294853781,
|
45107 |
+
"grad_norm": 2.5943546295166016,
|
45108 |
+
"learning_rate": 8.451017396797202e-05,
|
45109 |
+
"loss": 0.0931,
|
45110 |
+
"step": 6443
|
45111 |
+
},
|
45112 |
+
{
|
45113 |
+
"epoch": 0.2605557633406451,
|
45114 |
+
"grad_norm": 2.9449124336242676,
|
45115 |
+
"learning_rate": 8.450555896471434e-05,
|
45116 |
+
"loss": 0.1039,
|
45117 |
+
"step": 6444
|
45118 |
+
},
|
45119 |
+
{
|
45120 |
+
"epoch": 0.26059619719591215,
|
45121 |
+
"grad_norm": 2.590688943862915,
|
45122 |
+
"learning_rate": 8.450094340011829e-05,
|
45123 |
+
"loss": 0.1321,
|
45124 |
+
"step": 6445
|
45125 |
+
},
|
45126 |
+
{
|
45127 |
+
"epoch": 0.2606366310511792,
|
45128 |
+
"grad_norm": 5.73232364654541,
|
45129 |
+
"learning_rate": 8.449632727425899e-05,
|
45130 |
+
"loss": 0.1826,
|
45131 |
+
"step": 6446
|
45132 |
+
},
|
45133 |
+
{
|
45134 |
+
"epoch": 0.26067706490644615,
|
45135 |
+
"grad_norm": 5.102660655975342,
|
45136 |
+
"learning_rate": 8.449171058721151e-05,
|
45137 |
+
"loss": 0.2592,
|
45138 |
+
"step": 6447
|
45139 |
+
},
|
45140 |
+
{
|
45141 |
+
"epoch": 0.2607174987617132,
|
45142 |
+
"grad_norm": 6.164446830749512,
|
45143 |
+
"learning_rate": 8.448709333905092e-05,
|
45144 |
+
"loss": 0.2107,
|
45145 |
+
"step": 6448
|
45146 |
+
},
|
45147 |
+
{
|
45148 |
+
"epoch": 0.2607579326169802,
|
45149 |
+
"grad_norm": 5.607487678527832,
|
45150 |
+
"learning_rate": 8.448247552985237e-05,
|
45151 |
+
"loss": 0.1682,
|
45152 |
+
"step": 6449
|
45153 |
+
},
|
45154 |
+
{
|
45155 |
+
"epoch": 0.2607983664722472,
|
45156 |
+
"grad_norm": 3.105562686920166,
|
45157 |
+
"learning_rate": 8.447785715969102e-05,
|
45158 |
+
"loss": 0.1193,
|
45159 |
+
"step": 6450
|
45160 |
+
},
|
45161 |
+
{
|
45162 |
+
"epoch": 0.2608388003275142,
|
45163 |
+
"grad_norm": 3.8859353065490723,
|
45164 |
+
"learning_rate": 8.447323822864192e-05,
|
45165 |
+
"loss": 0.1914,
|
45166 |
+
"step": 6451
|
45167 |
+
},
|
45168 |
+
{
|
45169 |
+
"epoch": 0.26087923418278125,
|
45170 |
+
"grad_norm": 3.000746726989746,
|
45171 |
+
"learning_rate": 8.44686187367803e-05,
|
45172 |
+
"loss": 0.15,
|
45173 |
+
"step": 6452
|
45174 |
+
},
|
45175 |
+
{
|
45176 |
+
"epoch": 0.2609196680380483,
|
45177 |
+
"grad_norm": 7.3277716636657715,
|
45178 |
+
"learning_rate": 8.446399868418122e-05,
|
45179 |
+
"loss": 0.2542,
|
45180 |
+
"step": 6453
|
45181 |
}
|
45182 |
],
|
45183 |
"logging_steps": 1,
|
|
|
45197 |
"attributes": {}
|
45198 |
}
|
45199 |
},
|
45200 |
+
"total_flos": 3.991235411878871e+17,
|
45201 |
"train_batch_size": 4,
|
45202 |
"trial_name": null,
|
45203 |
"trial_params": null
|