Training in progress, step 5975, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140880624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e703ad2f82ea35ef5a7aee6880c146bd3e51bf1be78dc85b1dcd547fec470cb
|
3 |
size 1140880624
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2281891834
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9efd81bd9c7efd5835fc54cf2c839b58e6f9a7379aad0fae94d16876b33bb003
|
3 |
size 2281891834
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:430c3bf0b3e6f4b096445ed48b02979db0a8c413b8ab7576fa7f2654a9ca9dcf
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdd4c22859701fe01c2bcb9dda743cc0aefb30cedcf44f10dadf647d61c3ff9d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -40159,6 +40159,1679 @@
|
|
40159 |
"learning_rate": 8.762814700815055e-05,
|
40160 |
"loss": 0.1841,
|
40161 |
"step": 5736
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40162 |
}
|
40163 |
],
|
40164 |
"logging_steps": 1,
|
@@ -40178,7 +41851,7 @@
|
|
40178 |
"attributes": {}
|
40179 |
}
|
40180 |
},
|
40181 |
-
"total_flos": 3.
|
40182 |
"train_batch_size": 4,
|
40183 |
"trial_name": null,
|
40184 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.24159228522041507,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5975,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
40159 |
"learning_rate": 8.762814700815055e-05,
|
40160 |
"loss": 0.1841,
|
40161 |
"step": 5736
|
40162 |
+
},
|
40163 |
+
{
|
40164 |
+
"epoch": 0.23196902766686547,
|
40165 |
+
"grad_norm": 3.6567726135253906,
|
40166 |
+
"learning_rate": 8.76239471108841e-05,
|
40167 |
+
"loss": 0.1751,
|
40168 |
+
"step": 5737
|
40169 |
+
},
|
40170 |
+
{
|
40171 |
+
"epoch": 0.23200946152213248,
|
40172 |
+
"grad_norm": 5.264025688171387,
|
40173 |
+
"learning_rate": 8.761974660154917e-05,
|
40174 |
+
"loss": 0.2108,
|
40175 |
+
"step": 5738
|
40176 |
+
},
|
40177 |
+
{
|
40178 |
+
"epoch": 0.2320498953773995,
|
40179 |
+
"grad_norm": 2.649386167526245,
|
40180 |
+
"learning_rate": 8.76155454802141e-05,
|
40181 |
+
"loss": 0.1209,
|
40182 |
+
"step": 5739
|
40183 |
+
},
|
40184 |
+
{
|
40185 |
+
"epoch": 0.2320903292326665,
|
40186 |
+
"grad_norm": 2.847513198852539,
|
40187 |
+
"learning_rate": 8.761134374694723e-05,
|
40188 |
+
"loss": 0.2378,
|
40189 |
+
"step": 5740
|
40190 |
+
},
|
40191 |
+
{
|
40192 |
+
"epoch": 0.23213076308793354,
|
40193 |
+
"grad_norm": 3.577235698699951,
|
40194 |
+
"learning_rate": 8.760714140181694e-05,
|
40195 |
+
"loss": 0.0858,
|
40196 |
+
"step": 5741
|
40197 |
+
},
|
40198 |
+
{
|
40199 |
+
"epoch": 0.23217119694320054,
|
40200 |
+
"grad_norm": 4.185028553009033,
|
40201 |
+
"learning_rate": 8.760293844489156e-05,
|
40202 |
+
"loss": 0.1261,
|
40203 |
+
"step": 5742
|
40204 |
+
},
|
40205 |
+
{
|
40206 |
+
"epoch": 0.23221163079846754,
|
40207 |
+
"grad_norm": 2.3881616592407227,
|
40208 |
+
"learning_rate": 8.759873487623946e-05,
|
40209 |
+
"loss": 0.1008,
|
40210 |
+
"step": 5743
|
40211 |
+
},
|
40212 |
+
{
|
40213 |
+
"epoch": 0.23225206465373457,
|
40214 |
+
"grad_norm": 4.907158851623535,
|
40215 |
+
"learning_rate": 8.759453069592904e-05,
|
40216 |
+
"loss": 0.2529,
|
40217 |
+
"step": 5744
|
40218 |
+
},
|
40219 |
+
{
|
40220 |
+
"epoch": 0.23229249850900158,
|
40221 |
+
"grad_norm": 8.864971160888672,
|
40222 |
+
"learning_rate": 8.759032590402872e-05,
|
40223 |
+
"loss": 0.2571,
|
40224 |
+
"step": 5745
|
40225 |
+
},
|
40226 |
+
{
|
40227 |
+
"epoch": 0.2323329323642686,
|
40228 |
+
"grad_norm": 8.148448944091797,
|
40229 |
+
"learning_rate": 8.758612050060685e-05,
|
40230 |
+
"loss": 0.3103,
|
40231 |
+
"step": 5746
|
40232 |
+
},
|
40233 |
+
{
|
40234 |
+
"epoch": 0.2323733662195356,
|
40235 |
+
"grad_norm": 3.8654823303222656,
|
40236 |
+
"learning_rate": 8.758191448573188e-05,
|
40237 |
+
"loss": 0.0603,
|
40238 |
+
"step": 5747
|
40239 |
+
},
|
40240 |
+
{
|
40241 |
+
"epoch": 0.23241380007480264,
|
40242 |
+
"grad_norm": 2.587003469467163,
|
40243 |
+
"learning_rate": 8.757770785947221e-05,
|
40244 |
+
"loss": 0.078,
|
40245 |
+
"step": 5748
|
40246 |
+
},
|
40247 |
+
{
|
40248 |
+
"epoch": 0.23245423393006964,
|
40249 |
+
"grad_norm": 3.87430739402771,
|
40250 |
+
"learning_rate": 8.757350062189631e-05,
|
40251 |
+
"loss": 0.0999,
|
40252 |
+
"step": 5749
|
40253 |
+
},
|
40254 |
+
{
|
40255 |
+
"epoch": 0.23249466778533667,
|
40256 |
+
"grad_norm": 13.096192359924316,
|
40257 |
+
"learning_rate": 8.756929277307259e-05,
|
40258 |
+
"loss": 0.3003,
|
40259 |
+
"step": 5750
|
40260 |
+
},
|
40261 |
+
{
|
40262 |
+
"epoch": 0.23253510164060368,
|
40263 |
+
"grad_norm": 2.8238162994384766,
|
40264 |
+
"learning_rate": 8.756508431306951e-05,
|
40265 |
+
"loss": 0.1917,
|
40266 |
+
"step": 5751
|
40267 |
+
},
|
40268 |
+
{
|
40269 |
+
"epoch": 0.2325755354958707,
|
40270 |
+
"grad_norm": 6.673470497131348,
|
40271 |
+
"learning_rate": 8.756087524195555e-05,
|
40272 |
+
"loss": 0.1887,
|
40273 |
+
"step": 5752
|
40274 |
+
},
|
40275 |
+
{
|
40276 |
+
"epoch": 0.2326159693511377,
|
40277 |
+
"grad_norm": 4.917054176330566,
|
40278 |
+
"learning_rate": 8.755666555979915e-05,
|
40279 |
+
"loss": 0.1403,
|
40280 |
+
"step": 5753
|
40281 |
+
},
|
40282 |
+
{
|
40283 |
+
"epoch": 0.2326564032064047,
|
40284 |
+
"grad_norm": 6.934584140777588,
|
40285 |
+
"learning_rate": 8.755245526666884e-05,
|
40286 |
+
"loss": 0.2271,
|
40287 |
+
"step": 5754
|
40288 |
+
},
|
40289 |
+
{
|
40290 |
+
"epoch": 0.23269683706167174,
|
40291 |
+
"grad_norm": 11.554547309875488,
|
40292 |
+
"learning_rate": 8.754824436263307e-05,
|
40293 |
+
"loss": 0.2719,
|
40294 |
+
"step": 5755
|
40295 |
+
},
|
40296 |
+
{
|
40297 |
+
"epoch": 0.23273727091693874,
|
40298 |
+
"grad_norm": 5.0187225341796875,
|
40299 |
+
"learning_rate": 8.754403284776037e-05,
|
40300 |
+
"loss": 0.1601,
|
40301 |
+
"step": 5756
|
40302 |
+
},
|
40303 |
+
{
|
40304 |
+
"epoch": 0.23277770477220577,
|
40305 |
+
"grad_norm": 4.799263954162598,
|
40306 |
+
"learning_rate": 8.753982072211924e-05,
|
40307 |
+
"loss": 0.3102,
|
40308 |
+
"step": 5757
|
40309 |
+
},
|
40310 |
+
{
|
40311 |
+
"epoch": 0.23281813862747278,
|
40312 |
+
"grad_norm": 3.329210042953491,
|
40313 |
+
"learning_rate": 8.753560798577823e-05,
|
40314 |
+
"loss": 0.1409,
|
40315 |
+
"step": 5758
|
40316 |
+
},
|
40317 |
+
{
|
40318 |
+
"epoch": 0.2328585724827398,
|
40319 |
+
"grad_norm": 6.168715953826904,
|
40320 |
+
"learning_rate": 8.753139463880582e-05,
|
40321 |
+
"loss": 0.2019,
|
40322 |
+
"step": 5759
|
40323 |
+
},
|
40324 |
+
{
|
40325 |
+
"epoch": 0.2328990063380068,
|
40326 |
+
"grad_norm": 2.401323080062866,
|
40327 |
+
"learning_rate": 8.75271806812706e-05,
|
40328 |
+
"loss": 0.1743,
|
40329 |
+
"step": 5760
|
40330 |
+
},
|
40331 |
+
{
|
40332 |
+
"epoch": 0.23293944019327384,
|
40333 |
+
"grad_norm": 5.326553821563721,
|
40334 |
+
"learning_rate": 8.75229661132411e-05,
|
40335 |
+
"loss": 0.2354,
|
40336 |
+
"step": 5761
|
40337 |
+
},
|
40338 |
+
{
|
40339 |
+
"epoch": 0.23297987404854084,
|
40340 |
+
"grad_norm": 7.545034408569336,
|
40341 |
+
"learning_rate": 8.751875093478588e-05,
|
40342 |
+
"loss": 0.2768,
|
40343 |
+
"step": 5762
|
40344 |
+
},
|
40345 |
+
{
|
40346 |
+
"epoch": 0.23302030790380787,
|
40347 |
+
"grad_norm": 6.828022003173828,
|
40348 |
+
"learning_rate": 8.751453514597354e-05,
|
40349 |
+
"loss": 0.2114,
|
40350 |
+
"step": 5763
|
40351 |
+
},
|
40352 |
+
{
|
40353 |
+
"epoch": 0.23306074175907487,
|
40354 |
+
"grad_norm": 2.339292049407959,
|
40355 |
+
"learning_rate": 8.751031874687263e-05,
|
40356 |
+
"loss": 0.1785,
|
40357 |
+
"step": 5764
|
40358 |
+
},
|
40359 |
+
{
|
40360 |
+
"epoch": 0.23310117561434188,
|
40361 |
+
"grad_norm": 4.959456920623779,
|
40362 |
+
"learning_rate": 8.750610173755177e-05,
|
40363 |
+
"loss": 0.2792,
|
40364 |
+
"step": 5765
|
40365 |
+
},
|
40366 |
+
{
|
40367 |
+
"epoch": 0.2331416094696089,
|
40368 |
+
"grad_norm": 2.2757108211517334,
|
40369 |
+
"learning_rate": 8.750188411807954e-05,
|
40370 |
+
"loss": 0.26,
|
40371 |
+
"step": 5766
|
40372 |
+
},
|
40373 |
+
{
|
40374 |
+
"epoch": 0.2331820433248759,
|
40375 |
+
"grad_norm": 1.756913661956787,
|
40376 |
+
"learning_rate": 8.749766588852457e-05,
|
40377 |
+
"loss": 0.1486,
|
40378 |
+
"step": 5767
|
40379 |
+
},
|
40380 |
+
{
|
40381 |
+
"epoch": 0.23322247718014294,
|
40382 |
+
"grad_norm": 2.4095423221588135,
|
40383 |
+
"learning_rate": 8.749344704895548e-05,
|
40384 |
+
"loss": 0.1522,
|
40385 |
+
"step": 5768
|
40386 |
+
},
|
40387 |
+
{
|
40388 |
+
"epoch": 0.23326291103540994,
|
40389 |
+
"grad_norm": 3.765080690383911,
|
40390 |
+
"learning_rate": 8.74892275994409e-05,
|
40391 |
+
"loss": 0.2167,
|
40392 |
+
"step": 5769
|
40393 |
+
},
|
40394 |
+
{
|
40395 |
+
"epoch": 0.23330334489067697,
|
40396 |
+
"grad_norm": 6.353774547576904,
|
40397 |
+
"learning_rate": 8.748500754004944e-05,
|
40398 |
+
"loss": 0.2431,
|
40399 |
+
"step": 5770
|
40400 |
+
},
|
40401 |
+
{
|
40402 |
+
"epoch": 0.23334377874594397,
|
40403 |
+
"grad_norm": 5.1684417724609375,
|
40404 |
+
"learning_rate": 8.74807868708498e-05,
|
40405 |
+
"loss": 0.137,
|
40406 |
+
"step": 5771
|
40407 |
+
},
|
40408 |
+
{
|
40409 |
+
"epoch": 0.233384212601211,
|
40410 |
+
"grad_norm": 2.4692533016204834,
|
40411 |
+
"learning_rate": 8.747656559191063e-05,
|
40412 |
+
"loss": 0.1377,
|
40413 |
+
"step": 5772
|
40414 |
+
},
|
40415 |
+
{
|
40416 |
+
"epoch": 0.233424646456478,
|
40417 |
+
"grad_norm": 3.280935525894165,
|
40418 |
+
"learning_rate": 8.747234370330058e-05,
|
40419 |
+
"loss": 0.2056,
|
40420 |
+
"step": 5773
|
40421 |
+
},
|
40422 |
+
{
|
40423 |
+
"epoch": 0.23346508031174504,
|
40424 |
+
"grad_norm": 3.835749864578247,
|
40425 |
+
"learning_rate": 8.746812120508836e-05,
|
40426 |
+
"loss": 0.11,
|
40427 |
+
"step": 5774
|
40428 |
+
},
|
40429 |
+
{
|
40430 |
+
"epoch": 0.23350551416701204,
|
40431 |
+
"grad_norm": 4.708775043487549,
|
40432 |
+
"learning_rate": 8.746389809734266e-05,
|
40433 |
+
"loss": 0.1599,
|
40434 |
+
"step": 5775
|
40435 |
+
},
|
40436 |
+
{
|
40437 |
+
"epoch": 0.23354594802227904,
|
40438 |
+
"grad_norm": 4.239015579223633,
|
40439 |
+
"learning_rate": 8.745967438013215e-05,
|
40440 |
+
"loss": 0.1408,
|
40441 |
+
"step": 5776
|
40442 |
+
},
|
40443 |
+
{
|
40444 |
+
"epoch": 0.23358638187754607,
|
40445 |
+
"grad_norm": 4.3496413230896,
|
40446 |
+
"learning_rate": 8.745545005352557e-05,
|
40447 |
+
"loss": 0.1207,
|
40448 |
+
"step": 5777
|
40449 |
+
},
|
40450 |
+
{
|
40451 |
+
"epoch": 0.23362681573281308,
|
40452 |
+
"grad_norm": 3.9284868240356445,
|
40453 |
+
"learning_rate": 8.745122511759163e-05,
|
40454 |
+
"loss": 0.1584,
|
40455 |
+
"step": 5778
|
40456 |
+
},
|
40457 |
+
{
|
40458 |
+
"epoch": 0.2336672495880801,
|
40459 |
+
"grad_norm": 5.885026931762695,
|
40460 |
+
"learning_rate": 8.744699957239908e-05,
|
40461 |
+
"loss": 0.198,
|
40462 |
+
"step": 5779
|
40463 |
+
},
|
40464 |
+
{
|
40465 |
+
"epoch": 0.2337076834433471,
|
40466 |
+
"grad_norm": 2.9485673904418945,
|
40467 |
+
"learning_rate": 8.744277341801664e-05,
|
40468 |
+
"loss": 0.2196,
|
40469 |
+
"step": 5780
|
40470 |
+
},
|
40471 |
+
{
|
40472 |
+
"epoch": 0.23374811729861414,
|
40473 |
+
"grad_norm": 7.390035629272461,
|
40474 |
+
"learning_rate": 8.743854665451305e-05,
|
40475 |
+
"loss": 0.2785,
|
40476 |
+
"step": 5781
|
40477 |
+
},
|
40478 |
+
{
|
40479 |
+
"epoch": 0.23378855115388114,
|
40480 |
+
"grad_norm": 3.329568386077881,
|
40481 |
+
"learning_rate": 8.74343192819571e-05,
|
40482 |
+
"loss": 0.2456,
|
40483 |
+
"step": 5782
|
40484 |
+
},
|
40485 |
+
{
|
40486 |
+
"epoch": 0.23382898500914817,
|
40487 |
+
"grad_norm": 3.300050735473633,
|
40488 |
+
"learning_rate": 8.743009130041757e-05,
|
40489 |
+
"loss": 0.11,
|
40490 |
+
"step": 5783
|
40491 |
+
},
|
40492 |
+
{
|
40493 |
+
"epoch": 0.23386941886441517,
|
40494 |
+
"grad_norm": 6.936415672302246,
|
40495 |
+
"learning_rate": 8.74258627099632e-05,
|
40496 |
+
"loss": 0.2326,
|
40497 |
+
"step": 5784
|
40498 |
+
},
|
40499 |
+
{
|
40500 |
+
"epoch": 0.23390985271968218,
|
40501 |
+
"grad_norm": 3.382483720779419,
|
40502 |
+
"learning_rate": 8.742163351066281e-05,
|
40503 |
+
"loss": 0.1037,
|
40504 |
+
"step": 5785
|
40505 |
+
},
|
40506 |
+
{
|
40507 |
+
"epoch": 0.2339502865749492,
|
40508 |
+
"grad_norm": 3.018651247024536,
|
40509 |
+
"learning_rate": 8.74174037025852e-05,
|
40510 |
+
"loss": 0.0982,
|
40511 |
+
"step": 5786
|
40512 |
+
},
|
40513 |
+
{
|
40514 |
+
"epoch": 0.2339907204302162,
|
40515 |
+
"grad_norm": 3.692439079284668,
|
40516 |
+
"learning_rate": 8.741317328579917e-05,
|
40517 |
+
"loss": 0.1302,
|
40518 |
+
"step": 5787
|
40519 |
+
},
|
40520 |
+
{
|
40521 |
+
"epoch": 0.23403115428548324,
|
40522 |
+
"grad_norm": 2.0556282997131348,
|
40523 |
+
"learning_rate": 8.740894226037355e-05,
|
40524 |
+
"loss": 0.0681,
|
40525 |
+
"step": 5788
|
40526 |
+
},
|
40527 |
+
{
|
40528 |
+
"epoch": 0.23407158814075024,
|
40529 |
+
"grad_norm": 4.005192756652832,
|
40530 |
+
"learning_rate": 8.740471062637717e-05,
|
40531 |
+
"loss": 0.1894,
|
40532 |
+
"step": 5789
|
40533 |
+
},
|
40534 |
+
{
|
40535 |
+
"epoch": 0.23411202199601727,
|
40536 |
+
"grad_norm": 4.588472843170166,
|
40537 |
+
"learning_rate": 8.740047838387885e-05,
|
40538 |
+
"loss": 0.2187,
|
40539 |
+
"step": 5790
|
40540 |
+
},
|
40541 |
+
{
|
40542 |
+
"epoch": 0.23415245585128427,
|
40543 |
+
"grad_norm": 4.5771803855896,
|
40544 |
+
"learning_rate": 8.739624553294746e-05,
|
40545 |
+
"loss": 0.2587,
|
40546 |
+
"step": 5791
|
40547 |
+
},
|
40548 |
+
{
|
40549 |
+
"epoch": 0.2341928897065513,
|
40550 |
+
"grad_norm": 6.830857276916504,
|
40551 |
+
"learning_rate": 8.739201207365186e-05,
|
40552 |
+
"loss": 0.2328,
|
40553 |
+
"step": 5792
|
40554 |
+
},
|
40555 |
+
{
|
40556 |
+
"epoch": 0.2342333235618183,
|
40557 |
+
"grad_norm": 4.287374019622803,
|
40558 |
+
"learning_rate": 8.738777800606092e-05,
|
40559 |
+
"loss": 0.1453,
|
40560 |
+
"step": 5793
|
40561 |
+
},
|
40562 |
+
{
|
40563 |
+
"epoch": 0.23427375741708534,
|
40564 |
+
"grad_norm": 10.03058910369873,
|
40565 |
+
"learning_rate": 8.738354333024351e-05,
|
40566 |
+
"loss": 0.2978,
|
40567 |
+
"step": 5794
|
40568 |
+
},
|
40569 |
+
{
|
40570 |
+
"epoch": 0.23431419127235234,
|
40571 |
+
"grad_norm": 6.294463634490967,
|
40572 |
+
"learning_rate": 8.737930804626852e-05,
|
40573 |
+
"loss": 0.173,
|
40574 |
+
"step": 5795
|
40575 |
+
},
|
40576 |
+
{
|
40577 |
+
"epoch": 0.23435462512761934,
|
40578 |
+
"grad_norm": 5.071019172668457,
|
40579 |
+
"learning_rate": 8.737507215420489e-05,
|
40580 |
+
"loss": 0.1651,
|
40581 |
+
"step": 5796
|
40582 |
+
},
|
40583 |
+
{
|
40584 |
+
"epoch": 0.23439505898288637,
|
40585 |
+
"grad_norm": 3.06192684173584,
|
40586 |
+
"learning_rate": 8.737083565412146e-05,
|
40587 |
+
"loss": 0.1349,
|
40588 |
+
"step": 5797
|
40589 |
+
},
|
40590 |
+
{
|
40591 |
+
"epoch": 0.23443549283815338,
|
40592 |
+
"grad_norm": 4.775763034820557,
|
40593 |
+
"learning_rate": 8.736659854608721e-05,
|
40594 |
+
"loss": 0.115,
|
40595 |
+
"step": 5798
|
40596 |
+
},
|
40597 |
+
{
|
40598 |
+
"epoch": 0.2344759266934204,
|
40599 |
+
"grad_norm": 2.225261688232422,
|
40600 |
+
"learning_rate": 8.736236083017105e-05,
|
40601 |
+
"loss": 0.1293,
|
40602 |
+
"step": 5799
|
40603 |
+
},
|
40604 |
+
{
|
40605 |
+
"epoch": 0.2345163605486874,
|
40606 |
+
"grad_norm": 4.4199347496032715,
|
40607 |
+
"learning_rate": 8.735812250644188e-05,
|
40608 |
+
"loss": 0.1832,
|
40609 |
+
"step": 5800
|
40610 |
+
},
|
40611 |
+
{
|
40612 |
+
"epoch": 0.23455679440395444,
|
40613 |
+
"grad_norm": 5.317866802215576,
|
40614 |
+
"learning_rate": 8.735388357496871e-05,
|
40615 |
+
"loss": 0.2008,
|
40616 |
+
"step": 5801
|
40617 |
+
},
|
40618 |
+
{
|
40619 |
+
"epoch": 0.23459722825922144,
|
40620 |
+
"grad_norm": 2.7359652519226074,
|
40621 |
+
"learning_rate": 8.734964403582046e-05,
|
40622 |
+
"loss": 0.2234,
|
40623 |
+
"step": 5802
|
40624 |
+
},
|
40625 |
+
{
|
40626 |
+
"epoch": 0.23463766211448847,
|
40627 |
+
"grad_norm": 8.821404457092285,
|
40628 |
+
"learning_rate": 8.734540388906614e-05,
|
40629 |
+
"loss": 0.3094,
|
40630 |
+
"step": 5803
|
40631 |
+
},
|
40632 |
+
{
|
40633 |
+
"epoch": 0.23467809596975547,
|
40634 |
+
"grad_norm": 6.769132614135742,
|
40635 |
+
"learning_rate": 8.734116313477467e-05,
|
40636 |
+
"loss": 0.1578,
|
40637 |
+
"step": 5804
|
40638 |
+
},
|
40639 |
+
{
|
40640 |
+
"epoch": 0.2347185298250225,
|
40641 |
+
"grad_norm": 6.217795372009277,
|
40642 |
+
"learning_rate": 8.733692177301509e-05,
|
40643 |
+
"loss": 0.2463,
|
40644 |
+
"step": 5805
|
40645 |
+
},
|
40646 |
+
{
|
40647 |
+
"epoch": 0.2347589636802895,
|
40648 |
+
"grad_norm": 2.9610724449157715,
|
40649 |
+
"learning_rate": 8.733267980385638e-05,
|
40650 |
+
"loss": 0.1591,
|
40651 |
+
"step": 5806
|
40652 |
+
},
|
40653 |
+
{
|
40654 |
+
"epoch": 0.2347993975355565,
|
40655 |
+
"grad_norm": 3.7270467281341553,
|
40656 |
+
"learning_rate": 8.732843722736753e-05,
|
40657 |
+
"loss": 0.2956,
|
40658 |
+
"step": 5807
|
40659 |
+
},
|
40660 |
+
{
|
40661 |
+
"epoch": 0.23483983139082354,
|
40662 |
+
"grad_norm": 4.4273271560668945,
|
40663 |
+
"learning_rate": 8.732419404361758e-05,
|
40664 |
+
"loss": 0.1765,
|
40665 |
+
"step": 5808
|
40666 |
+
},
|
40667 |
+
{
|
40668 |
+
"epoch": 0.23488026524609054,
|
40669 |
+
"grad_norm": 3.3255577087402344,
|
40670 |
+
"learning_rate": 8.731995025267556e-05,
|
40671 |
+
"loss": 0.1365,
|
40672 |
+
"step": 5809
|
40673 |
+
},
|
40674 |
+
{
|
40675 |
+
"epoch": 0.23492069910135757,
|
40676 |
+
"grad_norm": 8.559576988220215,
|
40677 |
+
"learning_rate": 8.73157058546105e-05,
|
40678 |
+
"loss": 0.1813,
|
40679 |
+
"step": 5810
|
40680 |
+
},
|
40681 |
+
{
|
40682 |
+
"epoch": 0.23496113295662457,
|
40683 |
+
"grad_norm": 4.305734157562256,
|
40684 |
+
"learning_rate": 8.731146084949147e-05,
|
40685 |
+
"loss": 0.1383,
|
40686 |
+
"step": 5811
|
40687 |
+
},
|
40688 |
+
{
|
40689 |
+
"epoch": 0.2350015668118916,
|
40690 |
+
"grad_norm": 3.887387990951538,
|
40691 |
+
"learning_rate": 8.730721523738749e-05,
|
40692 |
+
"loss": 0.1957,
|
40693 |
+
"step": 5812
|
40694 |
+
},
|
40695 |
+
{
|
40696 |
+
"epoch": 0.2350420006671586,
|
40697 |
+
"grad_norm": 2.232271194458008,
|
40698 |
+
"learning_rate": 8.730296901836765e-05,
|
40699 |
+
"loss": 0.1972,
|
40700 |
+
"step": 5813
|
40701 |
+
},
|
40702 |
+
{
|
40703 |
+
"epoch": 0.23508243452242564,
|
40704 |
+
"grad_norm": 4.348437786102295,
|
40705 |
+
"learning_rate": 8.729872219250102e-05,
|
40706 |
+
"loss": 0.1786,
|
40707 |
+
"step": 5814
|
40708 |
+
},
|
40709 |
+
{
|
40710 |
+
"epoch": 0.23512286837769264,
|
40711 |
+
"grad_norm": 2.5670852661132812,
|
40712 |
+
"learning_rate": 8.729447475985671e-05,
|
40713 |
+
"loss": 0.2047,
|
40714 |
+
"step": 5815
|
40715 |
+
},
|
40716 |
+
{
|
40717 |
+
"epoch": 0.23516330223295967,
|
40718 |
+
"grad_norm": 1.968658447265625,
|
40719 |
+
"learning_rate": 8.729022672050378e-05,
|
40720 |
+
"loss": 0.1085,
|
40721 |
+
"step": 5816
|
40722 |
+
},
|
40723 |
+
{
|
40724 |
+
"epoch": 0.23520373608822667,
|
40725 |
+
"grad_norm": 1.6469684839248657,
|
40726 |
+
"learning_rate": 8.728597807451136e-05,
|
40727 |
+
"loss": 0.0815,
|
40728 |
+
"step": 5817
|
40729 |
+
},
|
40730 |
+
{
|
40731 |
+
"epoch": 0.23524416994349368,
|
40732 |
+
"grad_norm": 1.3831133842468262,
|
40733 |
+
"learning_rate": 8.728172882194856e-05,
|
40734 |
+
"loss": 0.0523,
|
40735 |
+
"step": 5818
|
40736 |
+
},
|
40737 |
+
{
|
40738 |
+
"epoch": 0.2352846037987607,
|
40739 |
+
"grad_norm": 3.7314982414245605,
|
40740 |
+
"learning_rate": 8.727747896288452e-05,
|
40741 |
+
"loss": 0.1326,
|
40742 |
+
"step": 5819
|
40743 |
+
},
|
40744 |
+
{
|
40745 |
+
"epoch": 0.2353250376540277,
|
40746 |
+
"grad_norm": 3.7328240871429443,
|
40747 |
+
"learning_rate": 8.727322849738837e-05,
|
40748 |
+
"loss": 0.1417,
|
40749 |
+
"step": 5820
|
40750 |
+
},
|
40751 |
+
{
|
40752 |
+
"epoch": 0.23536547150929474,
|
40753 |
+
"grad_norm": 2.3268916606903076,
|
40754 |
+
"learning_rate": 8.726897742552925e-05,
|
40755 |
+
"loss": 0.12,
|
40756 |
+
"step": 5821
|
40757 |
+
},
|
40758 |
+
{
|
40759 |
+
"epoch": 0.23540590536456174,
|
40760 |
+
"grad_norm": 4.0223894119262695,
|
40761 |
+
"learning_rate": 8.726472574737632e-05,
|
40762 |
+
"loss": 0.1849,
|
40763 |
+
"step": 5822
|
40764 |
+
},
|
40765 |
+
{
|
40766 |
+
"epoch": 0.23544633921982877,
|
40767 |
+
"grad_norm": 7.2805094718933105,
|
40768 |
+
"learning_rate": 8.726047346299875e-05,
|
40769 |
+
"loss": 0.1529,
|
40770 |
+
"step": 5823
|
40771 |
+
},
|
40772 |
+
{
|
40773 |
+
"epoch": 0.23548677307509577,
|
40774 |
+
"grad_norm": 5.168961048126221,
|
40775 |
+
"learning_rate": 8.725622057246571e-05,
|
40776 |
+
"loss": 0.1119,
|
40777 |
+
"step": 5824
|
40778 |
+
},
|
40779 |
+
{
|
40780 |
+
"epoch": 0.2355272069303628,
|
40781 |
+
"grad_norm": 5.309213161468506,
|
40782 |
+
"learning_rate": 8.72519670758464e-05,
|
40783 |
+
"loss": 0.267,
|
40784 |
+
"step": 5825
|
40785 |
+
},
|
40786 |
+
{
|
40787 |
+
"epoch": 0.2355676407856298,
|
40788 |
+
"grad_norm": 6.623654365539551,
|
40789 |
+
"learning_rate": 8.724771297321e-05,
|
40790 |
+
"loss": 0.2157,
|
40791 |
+
"step": 5826
|
40792 |
+
},
|
40793 |
+
{
|
40794 |
+
"epoch": 0.23560807464089684,
|
40795 |
+
"grad_norm": 5.449578285217285,
|
40796 |
+
"learning_rate": 8.724345826462573e-05,
|
40797 |
+
"loss": 0.2572,
|
40798 |
+
"step": 5827
|
40799 |
+
},
|
40800 |
+
{
|
40801 |
+
"epoch": 0.23564850849616384,
|
40802 |
+
"grad_norm": 6.319265842437744,
|
40803 |
+
"learning_rate": 8.723920295016279e-05,
|
40804 |
+
"loss": 0.1908,
|
40805 |
+
"step": 5828
|
40806 |
+
},
|
40807 |
+
{
|
40808 |
+
"epoch": 0.23568894235143084,
|
40809 |
+
"grad_norm": 3.596027374267578,
|
40810 |
+
"learning_rate": 8.72349470298904e-05,
|
40811 |
+
"loss": 0.1538,
|
40812 |
+
"step": 5829
|
40813 |
+
},
|
40814 |
+
{
|
40815 |
+
"epoch": 0.23572937620669787,
|
40816 |
+
"grad_norm": 6.945806980133057,
|
40817 |
+
"learning_rate": 8.723069050387785e-05,
|
40818 |
+
"loss": 0.1906,
|
40819 |
+
"step": 5830
|
40820 |
+
},
|
40821 |
+
{
|
40822 |
+
"epoch": 0.23576981006196487,
|
40823 |
+
"grad_norm": 3.5267140865325928,
|
40824 |
+
"learning_rate": 8.722643337219431e-05,
|
40825 |
+
"loss": 0.151,
|
40826 |
+
"step": 5831
|
40827 |
+
},
|
40828 |
+
{
|
40829 |
+
"epoch": 0.2358102439172319,
|
40830 |
+
"grad_norm": 4.481578350067139,
|
40831 |
+
"learning_rate": 8.722217563490908e-05,
|
40832 |
+
"loss": 0.1609,
|
40833 |
+
"step": 5832
|
40834 |
+
},
|
40835 |
+
{
|
40836 |
+
"epoch": 0.2358506777724989,
|
40837 |
+
"grad_norm": 4.303544044494629,
|
40838 |
+
"learning_rate": 8.721791729209142e-05,
|
40839 |
+
"loss": 0.1594,
|
40840 |
+
"step": 5833
|
40841 |
+
},
|
40842 |
+
{
|
40843 |
+
"epoch": 0.23589111162776594,
|
40844 |
+
"grad_norm": 8.424549102783203,
|
40845 |
+
"learning_rate": 8.72136583438106e-05,
|
40846 |
+
"loss": 0.2806,
|
40847 |
+
"step": 5834
|
40848 |
+
},
|
40849 |
+
{
|
40850 |
+
"epoch": 0.23593154548303294,
|
40851 |
+
"grad_norm": 3.254166603088379,
|
40852 |
+
"learning_rate": 8.72093987901359e-05,
|
40853 |
+
"loss": 0.1041,
|
40854 |
+
"step": 5835
|
40855 |
+
},
|
40856 |
+
{
|
40857 |
+
"epoch": 0.23597197933829997,
|
40858 |
+
"grad_norm": 2.9343652725219727,
|
40859 |
+
"learning_rate": 8.720513863113663e-05,
|
40860 |
+
"loss": 0.1437,
|
40861 |
+
"step": 5836
|
40862 |
+
},
|
40863 |
+
{
|
40864 |
+
"epoch": 0.23601241319356697,
|
40865 |
+
"grad_norm": 4.241241931915283,
|
40866 |
+
"learning_rate": 8.720087786688207e-05,
|
40867 |
+
"loss": 0.1908,
|
40868 |
+
"step": 5837
|
40869 |
+
},
|
40870 |
+
{
|
40871 |
+
"epoch": 0.236052847048834,
|
40872 |
+
"grad_norm": 4.533597469329834,
|
40873 |
+
"learning_rate": 8.719661649744157e-05,
|
40874 |
+
"loss": 0.1449,
|
40875 |
+
"step": 5838
|
40876 |
+
},
|
40877 |
+
{
|
40878 |
+
"epoch": 0.236093280904101,
|
40879 |
+
"grad_norm": 6.601241111755371,
|
40880 |
+
"learning_rate": 8.719235452288441e-05,
|
40881 |
+
"loss": 0.2474,
|
40882 |
+
"step": 5839
|
40883 |
+
},
|
40884 |
+
{
|
40885 |
+
"epoch": 0.236133714759368,
|
40886 |
+
"grad_norm": 5.3936614990234375,
|
40887 |
+
"learning_rate": 8.718809194327997e-05,
|
40888 |
+
"loss": 0.1519,
|
40889 |
+
"step": 5840
|
40890 |
+
},
|
40891 |
+
{
|
40892 |
+
"epoch": 0.23617414861463504,
|
40893 |
+
"grad_norm": 6.60890531539917,
|
40894 |
+
"learning_rate": 8.718382875869756e-05,
|
40895 |
+
"loss": 0.3376,
|
40896 |
+
"step": 5841
|
40897 |
+
},
|
40898 |
+
{
|
40899 |
+
"epoch": 0.23621458246990204,
|
40900 |
+
"grad_norm": 2.936448812484741,
|
40901 |
+
"learning_rate": 8.717956496920654e-05,
|
40902 |
+
"loss": 0.1402,
|
40903 |
+
"step": 5842
|
40904 |
+
},
|
40905 |
+
{
|
40906 |
+
"epoch": 0.23625501632516907,
|
40907 |
+
"grad_norm": 5.989627361297607,
|
40908 |
+
"learning_rate": 8.717530057487629e-05,
|
40909 |
+
"loss": 0.2525,
|
40910 |
+
"step": 5843
|
40911 |
+
},
|
40912 |
+
{
|
40913 |
+
"epoch": 0.23629545018043607,
|
40914 |
+
"grad_norm": 4.293125152587891,
|
40915 |
+
"learning_rate": 8.717103557577615e-05,
|
40916 |
+
"loss": 0.1998,
|
40917 |
+
"step": 5844
|
40918 |
+
},
|
40919 |
+
{
|
40920 |
+
"epoch": 0.2363358840357031,
|
40921 |
+
"grad_norm": 6.075552940368652,
|
40922 |
+
"learning_rate": 8.716676997197555e-05,
|
40923 |
+
"loss": 0.2591,
|
40924 |
+
"step": 5845
|
40925 |
+
},
|
40926 |
+
{
|
40927 |
+
"epoch": 0.2363763178909701,
|
40928 |
+
"grad_norm": 5.427404880523682,
|
40929 |
+
"learning_rate": 8.716250376354385e-05,
|
40930 |
+
"loss": 0.0835,
|
40931 |
+
"step": 5846
|
40932 |
+
},
|
40933 |
+
{
|
40934 |
+
"epoch": 0.23641675174623714,
|
40935 |
+
"grad_norm": 6.130009174346924,
|
40936 |
+
"learning_rate": 8.715823695055046e-05,
|
40937 |
+
"loss": 0.2353,
|
40938 |
+
"step": 5847
|
40939 |
+
},
|
40940 |
+
{
|
40941 |
+
"epoch": 0.23645718560150414,
|
40942 |
+
"grad_norm": 4.348407745361328,
|
40943 |
+
"learning_rate": 8.715396953306478e-05,
|
40944 |
+
"loss": 0.267,
|
40945 |
+
"step": 5848
|
40946 |
+
},
|
40947 |
+
{
|
40948 |
+
"epoch": 0.23649761945677114,
|
40949 |
+
"grad_norm": 7.366415500640869,
|
40950 |
+
"learning_rate": 8.714970151115625e-05,
|
40951 |
+
"loss": 0.1881,
|
40952 |
+
"step": 5849
|
40953 |
+
},
|
40954 |
+
{
|
40955 |
+
"epoch": 0.23653805331203817,
|
40956 |
+
"grad_norm": 3.2398664951324463,
|
40957 |
+
"learning_rate": 8.714543288489432e-05,
|
40958 |
+
"loss": 0.1139,
|
40959 |
+
"step": 5850
|
40960 |
+
},
|
40961 |
+
{
|
40962 |
+
"epoch": 0.23657848716730517,
|
40963 |
+
"grad_norm": 5.649832248687744,
|
40964 |
+
"learning_rate": 8.714116365434839e-05,
|
40965 |
+
"loss": 0.2578,
|
40966 |
+
"step": 5851
|
40967 |
+
},
|
40968 |
+
{
|
40969 |
+
"epoch": 0.2366189210225722,
|
40970 |
+
"grad_norm": 2.6740245819091797,
|
40971 |
+
"learning_rate": 8.713689381958795e-05,
|
40972 |
+
"loss": 0.1679,
|
40973 |
+
"step": 5852
|
40974 |
+
},
|
40975 |
+
{
|
40976 |
+
"epoch": 0.2366593548778392,
|
40977 |
+
"grad_norm": 3.967548131942749,
|
40978 |
+
"learning_rate": 8.713262338068243e-05,
|
40979 |
+
"loss": 0.1872,
|
40980 |
+
"step": 5853
|
40981 |
+
},
|
40982 |
+
{
|
40983 |
+
"epoch": 0.23669978873310624,
|
40984 |
+
"grad_norm": 4.226831912994385,
|
40985 |
+
"learning_rate": 8.712835233770133e-05,
|
40986 |
+
"loss": 0.1886,
|
40987 |
+
"step": 5854
|
40988 |
+
},
|
40989 |
+
{
|
40990 |
+
"epoch": 0.23674022258837324,
|
40991 |
+
"grad_norm": 5.640081405639648,
|
40992 |
+
"learning_rate": 8.712408069071411e-05,
|
40993 |
+
"loss": 0.2116,
|
40994 |
+
"step": 5855
|
40995 |
+
},
|
40996 |
+
{
|
40997 |
+
"epoch": 0.23678065644364027,
|
40998 |
+
"grad_norm": 2.474032402038574,
|
40999 |
+
"learning_rate": 8.711980843979027e-05,
|
41000 |
+
"loss": 0.1502,
|
41001 |
+
"step": 5856
|
41002 |
+
},
|
41003 |
+
{
|
41004 |
+
"epoch": 0.23682109029890727,
|
41005 |
+
"grad_norm": 3.420659303665161,
|
41006 |
+
"learning_rate": 8.711553558499929e-05,
|
41007 |
+
"loss": 0.2218,
|
41008 |
+
"step": 5857
|
41009 |
+
},
|
41010 |
+
{
|
41011 |
+
"epoch": 0.2368615241541743,
|
41012 |
+
"grad_norm": 4.594886302947998,
|
41013 |
+
"learning_rate": 8.711126212641075e-05,
|
41014 |
+
"loss": 0.1995,
|
41015 |
+
"step": 5858
|
41016 |
+
},
|
41017 |
+
{
|
41018 |
+
"epoch": 0.2369019580094413,
|
41019 |
+
"grad_norm": 6.94998836517334,
|
41020 |
+
"learning_rate": 8.710698806409409e-05,
|
41021 |
+
"loss": 0.2613,
|
41022 |
+
"step": 5859
|
41023 |
+
},
|
41024 |
+
{
|
41025 |
+
"epoch": 0.2369423918647083,
|
41026 |
+
"grad_norm": 3.810699224472046,
|
41027 |
+
"learning_rate": 8.710271339811888e-05,
|
41028 |
+
"loss": 0.2414,
|
41029 |
+
"step": 5860
|
41030 |
+
},
|
41031 |
+
{
|
41032 |
+
"epoch": 0.23698282571997534,
|
41033 |
+
"grad_norm": 7.499741554260254,
|
41034 |
+
"learning_rate": 8.709843812855465e-05,
|
41035 |
+
"loss": 0.1937,
|
41036 |
+
"step": 5861
|
41037 |
+
},
|
41038 |
+
{
|
41039 |
+
"epoch": 0.23702325957524234,
|
41040 |
+
"grad_norm": 4.931144714355469,
|
41041 |
+
"learning_rate": 8.709416225547096e-05,
|
41042 |
+
"loss": 0.128,
|
41043 |
+
"step": 5862
|
41044 |
+
},
|
41045 |
+
{
|
41046 |
+
"epoch": 0.23706369343050937,
|
41047 |
+
"grad_norm": 2.9475507736206055,
|
41048 |
+
"learning_rate": 8.708988577893736e-05,
|
41049 |
+
"loss": 0.2139,
|
41050 |
+
"step": 5863
|
41051 |
+
},
|
41052 |
+
{
|
41053 |
+
"epoch": 0.23710412728577637,
|
41054 |
+
"grad_norm": 3.591439723968506,
|
41055 |
+
"learning_rate": 8.708560869902342e-05,
|
41056 |
+
"loss": 0.2295,
|
41057 |
+
"step": 5864
|
41058 |
+
},
|
41059 |
+
{
|
41060 |
+
"epoch": 0.2371445611410434,
|
41061 |
+
"grad_norm": 4.529487609863281,
|
41062 |
+
"learning_rate": 8.708133101579874e-05,
|
41063 |
+
"loss": 0.1935,
|
41064 |
+
"step": 5865
|
41065 |
+
},
|
41066 |
+
{
|
41067 |
+
"epoch": 0.2371849949963104,
|
41068 |
+
"grad_norm": 6.102591037750244,
|
41069 |
+
"learning_rate": 8.707705272933288e-05,
|
41070 |
+
"loss": 0.1877,
|
41071 |
+
"step": 5866
|
41072 |
+
},
|
41073 |
+
{
|
41074 |
+
"epoch": 0.23722542885157744,
|
41075 |
+
"grad_norm": 4.529348373413086,
|
41076 |
+
"learning_rate": 8.707277383969546e-05,
|
41077 |
+
"loss": 0.1789,
|
41078 |
+
"step": 5867
|
41079 |
+
},
|
41080 |
+
{
|
41081 |
+
"epoch": 0.23726586270684444,
|
41082 |
+
"grad_norm": 5.818760871887207,
|
41083 |
+
"learning_rate": 8.706849434695607e-05,
|
41084 |
+
"loss": 0.2781,
|
41085 |
+
"step": 5868
|
41086 |
+
},
|
41087 |
+
{
|
41088 |
+
"epoch": 0.23730629656211147,
|
41089 |
+
"grad_norm": 4.015359401702881,
|
41090 |
+
"learning_rate": 8.706421425118436e-05,
|
41091 |
+
"loss": 0.1195,
|
41092 |
+
"step": 5869
|
41093 |
+
},
|
41094 |
+
{
|
41095 |
+
"epoch": 0.23734673041737847,
|
41096 |
+
"grad_norm": 2.948803663253784,
|
41097 |
+
"learning_rate": 8.705993355244994e-05,
|
41098 |
+
"loss": 0.1357,
|
41099 |
+
"step": 5870
|
41100 |
+
},
|
41101 |
+
{
|
41102 |
+
"epoch": 0.23738716427264547,
|
41103 |
+
"grad_norm": 5.522337913513184,
|
41104 |
+
"learning_rate": 8.705565225082244e-05,
|
41105 |
+
"loss": 0.1231,
|
41106 |
+
"step": 5871
|
41107 |
+
},
|
41108 |
+
{
|
41109 |
+
"epoch": 0.2374275981279125,
|
41110 |
+
"grad_norm": 2.600602865219116,
|
41111 |
+
"learning_rate": 8.705137034637152e-05,
|
41112 |
+
"loss": 0.1913,
|
41113 |
+
"step": 5872
|
41114 |
+
},
|
41115 |
+
{
|
41116 |
+
"epoch": 0.2374680319831795,
|
41117 |
+
"grad_norm": 5.414295673370361,
|
41118 |
+
"learning_rate": 8.704708783916683e-05,
|
41119 |
+
"loss": 0.1661,
|
41120 |
+
"step": 5873
|
41121 |
+
},
|
41122 |
+
{
|
41123 |
+
"epoch": 0.23750846583844654,
|
41124 |
+
"grad_norm": 3.5818450450897217,
|
41125 |
+
"learning_rate": 8.704280472927804e-05,
|
41126 |
+
"loss": 0.1705,
|
41127 |
+
"step": 5874
|
41128 |
+
},
|
41129 |
+
{
|
41130 |
+
"epoch": 0.23754889969371354,
|
41131 |
+
"grad_norm": 2.1138157844543457,
|
41132 |
+
"learning_rate": 8.703852101677484e-05,
|
41133 |
+
"loss": 0.1061,
|
41134 |
+
"step": 5875
|
41135 |
+
},
|
41136 |
+
{
|
41137 |
+
"epoch": 0.23758933354898057,
|
41138 |
+
"grad_norm": 3.862276792526245,
|
41139 |
+
"learning_rate": 8.703423670172691e-05,
|
41140 |
+
"loss": 0.0831,
|
41141 |
+
"step": 5876
|
41142 |
+
},
|
41143 |
+
{
|
41144 |
+
"epoch": 0.23762976740424757,
|
41145 |
+
"grad_norm": 3.2862191200256348,
|
41146 |
+
"learning_rate": 8.702995178420395e-05,
|
41147 |
+
"loss": 0.193,
|
41148 |
+
"step": 5877
|
41149 |
+
},
|
41150 |
+
{
|
41151 |
+
"epoch": 0.2376702012595146,
|
41152 |
+
"grad_norm": 2.788506269454956,
|
41153 |
+
"learning_rate": 8.702566626427566e-05,
|
41154 |
+
"loss": 0.1478,
|
41155 |
+
"step": 5878
|
41156 |
+
},
|
41157 |
+
{
|
41158 |
+
"epoch": 0.2377106351147816,
|
41159 |
+
"grad_norm": 4.104318141937256,
|
41160 |
+
"learning_rate": 8.702138014201176e-05,
|
41161 |
+
"loss": 0.1211,
|
41162 |
+
"step": 5879
|
41163 |
+
},
|
41164 |
+
{
|
41165 |
+
"epoch": 0.23775106897004863,
|
41166 |
+
"grad_norm": 4.658287048339844,
|
41167 |
+
"learning_rate": 8.701709341748197e-05,
|
41168 |
+
"loss": 0.1015,
|
41169 |
+
"step": 5880
|
41170 |
+
},
|
41171 |
+
{
|
41172 |
+
"epoch": 0.23779150282531564,
|
41173 |
+
"grad_norm": 6.327422618865967,
|
41174 |
+
"learning_rate": 8.701280609075605e-05,
|
41175 |
+
"loss": 0.2001,
|
41176 |
+
"step": 5881
|
41177 |
+
},
|
41178 |
+
{
|
41179 |
+
"epoch": 0.23783193668058264,
|
41180 |
+
"grad_norm": 3.2962646484375,
|
41181 |
+
"learning_rate": 8.700851816190372e-05,
|
41182 |
+
"loss": 0.1955,
|
41183 |
+
"step": 5882
|
41184 |
+
},
|
41185 |
+
{
|
41186 |
+
"epoch": 0.23787237053584967,
|
41187 |
+
"grad_norm": 2.978949785232544,
|
41188 |
+
"learning_rate": 8.700422963099474e-05,
|
41189 |
+
"loss": 0.2011,
|
41190 |
+
"step": 5883
|
41191 |
+
},
|
41192 |
+
{
|
41193 |
+
"epoch": 0.23791280439111667,
|
41194 |
+
"grad_norm": 9.541155815124512,
|
41195 |
+
"learning_rate": 8.699994049809891e-05,
|
41196 |
+
"loss": 0.1872,
|
41197 |
+
"step": 5884
|
41198 |
+
},
|
41199 |
+
{
|
41200 |
+
"epoch": 0.2379532382463837,
|
41201 |
+
"grad_norm": 4.963170051574707,
|
41202 |
+
"learning_rate": 8.699565076328597e-05,
|
41203 |
+
"loss": 0.1111,
|
41204 |
+
"step": 5885
|
41205 |
+
},
|
41206 |
+
{
|
41207 |
+
"epoch": 0.2379936721016507,
|
41208 |
+
"grad_norm": 4.113926410675049,
|
41209 |
+
"learning_rate": 8.69913604266257e-05,
|
41210 |
+
"loss": 0.1011,
|
41211 |
+
"step": 5886
|
41212 |
+
},
|
41213 |
+
{
|
41214 |
+
"epoch": 0.23803410595691774,
|
41215 |
+
"grad_norm": 6.18961763381958,
|
41216 |
+
"learning_rate": 8.698706948818791e-05,
|
41217 |
+
"loss": 0.2459,
|
41218 |
+
"step": 5887
|
41219 |
+
},
|
41220 |
+
{
|
41221 |
+
"epoch": 0.23807453981218474,
|
41222 |
+
"grad_norm": 3.1595184803009033,
|
41223 |
+
"learning_rate": 8.698277794804243e-05,
|
41224 |
+
"loss": 0.1329,
|
41225 |
+
"step": 5888
|
41226 |
+
},
|
41227 |
+
{
|
41228 |
+
"epoch": 0.23811497366745177,
|
41229 |
+
"grad_norm": 2.9568212032318115,
|
41230 |
+
"learning_rate": 8.697848580625902e-05,
|
41231 |
+
"loss": 0.1943,
|
41232 |
+
"step": 5889
|
41233 |
+
},
|
41234 |
+
{
|
41235 |
+
"epoch": 0.23815540752271877,
|
41236 |
+
"grad_norm": 4.943511486053467,
|
41237 |
+
"learning_rate": 8.697419306290756e-05,
|
41238 |
+
"loss": 0.1432,
|
41239 |
+
"step": 5890
|
41240 |
+
},
|
41241 |
+
{
|
41242 |
+
"epoch": 0.2381958413779858,
|
41243 |
+
"grad_norm": 3.9663479328155518,
|
41244 |
+
"learning_rate": 8.696989971805784e-05,
|
41245 |
+
"loss": 0.2461,
|
41246 |
+
"step": 5891
|
41247 |
+
},
|
41248 |
+
{
|
41249 |
+
"epoch": 0.2382362752332528,
|
41250 |
+
"grad_norm": 3.9418280124664307,
|
41251 |
+
"learning_rate": 8.696560577177975e-05,
|
41252 |
+
"loss": 0.1536,
|
41253 |
+
"step": 5892
|
41254 |
+
},
|
41255 |
+
{
|
41256 |
+
"epoch": 0.2382767090885198,
|
41257 |
+
"grad_norm": 5.7964277267456055,
|
41258 |
+
"learning_rate": 8.69613112241431e-05,
|
41259 |
+
"loss": 0.155,
|
41260 |
+
"step": 5893
|
41261 |
+
},
|
41262 |
+
{
|
41263 |
+
"epoch": 0.23831714294378684,
|
41264 |
+
"grad_norm": 10.561478614807129,
|
41265 |
+
"learning_rate": 8.695701607521778e-05,
|
41266 |
+
"loss": 0.2146,
|
41267 |
+
"step": 5894
|
41268 |
+
},
|
41269 |
+
{
|
41270 |
+
"epoch": 0.23835757679905384,
|
41271 |
+
"grad_norm": 3.3532555103302,
|
41272 |
+
"learning_rate": 8.695272032507364e-05,
|
41273 |
+
"loss": 0.1057,
|
41274 |
+
"step": 5895
|
41275 |
+
},
|
41276 |
+
{
|
41277 |
+
"epoch": 0.23839801065432087,
|
41278 |
+
"grad_norm": 4.04283332824707,
|
41279 |
+
"learning_rate": 8.69484239737806e-05,
|
41280 |
+
"loss": 0.0644,
|
41281 |
+
"step": 5896
|
41282 |
+
},
|
41283 |
+
{
|
41284 |
+
"epoch": 0.23843844450958787,
|
41285 |
+
"grad_norm": 2.1637003421783447,
|
41286 |
+
"learning_rate": 8.694412702140853e-05,
|
41287 |
+
"loss": 0.0403,
|
41288 |
+
"step": 5897
|
41289 |
+
},
|
41290 |
+
{
|
41291 |
+
"epoch": 0.2384788783648549,
|
41292 |
+
"grad_norm": 5.118740558624268,
|
41293 |
+
"learning_rate": 8.693982946802732e-05,
|
41294 |
+
"loss": 0.1471,
|
41295 |
+
"step": 5898
|
41296 |
+
},
|
41297 |
+
{
|
41298 |
+
"epoch": 0.2385193122201219,
|
41299 |
+
"grad_norm": 3.4040768146514893,
|
41300 |
+
"learning_rate": 8.69355313137069e-05,
|
41301 |
+
"loss": 0.2177,
|
41302 |
+
"step": 5899
|
41303 |
+
},
|
41304 |
+
{
|
41305 |
+
"epoch": 0.23855974607538893,
|
41306 |
+
"grad_norm": 7.387509346008301,
|
41307 |
+
"learning_rate": 8.69312325585172e-05,
|
41308 |
+
"loss": 0.2381,
|
41309 |
+
"step": 5900
|
41310 |
+
},
|
41311 |
+
{
|
41312 |
+
"epoch": 0.23860017993065594,
|
41313 |
+
"grad_norm": 5.387016296386719,
|
41314 |
+
"learning_rate": 8.692693320252817e-05,
|
41315 |
+
"loss": 0.1852,
|
41316 |
+
"step": 5901
|
41317 |
+
},
|
41318 |
+
{
|
41319 |
+
"epoch": 0.23864061378592297,
|
41320 |
+
"grad_norm": 2.965902090072632,
|
41321 |
+
"learning_rate": 8.692263324580969e-05,
|
41322 |
+
"loss": 0.094,
|
41323 |
+
"step": 5902
|
41324 |
+
},
|
41325 |
+
{
|
41326 |
+
"epoch": 0.23868104764118997,
|
41327 |
+
"grad_norm": 3.6524593830108643,
|
41328 |
+
"learning_rate": 8.691833268843176e-05,
|
41329 |
+
"loss": 0.2512,
|
41330 |
+
"step": 5903
|
41331 |
+
},
|
41332 |
+
{
|
41333 |
+
"epoch": 0.23872148149645697,
|
41334 |
+
"grad_norm": 5.109528541564941,
|
41335 |
+
"learning_rate": 8.691403153046433e-05,
|
41336 |
+
"loss": 0.2786,
|
41337 |
+
"step": 5904
|
41338 |
+
},
|
41339 |
+
{
|
41340 |
+
"epoch": 0.238761915351724,
|
41341 |
+
"grad_norm": 5.959028720855713,
|
41342 |
+
"learning_rate": 8.690972977197736e-05,
|
41343 |
+
"loss": 0.1838,
|
41344 |
+
"step": 5905
|
41345 |
+
},
|
41346 |
+
{
|
41347 |
+
"epoch": 0.238802349206991,
|
41348 |
+
"grad_norm": 7.3199334144592285,
|
41349 |
+
"learning_rate": 8.690542741304087e-05,
|
41350 |
+
"loss": 0.2338,
|
41351 |
+
"step": 5906
|
41352 |
+
},
|
41353 |
+
{
|
41354 |
+
"epoch": 0.23884278306225804,
|
41355 |
+
"grad_norm": 5.897810935974121,
|
41356 |
+
"learning_rate": 8.690112445372479e-05,
|
41357 |
+
"loss": 0.1687,
|
41358 |
+
"step": 5907
|
41359 |
+
},
|
41360 |
+
{
|
41361 |
+
"epoch": 0.23888321691752504,
|
41362 |
+
"grad_norm": 5.121602535247803,
|
41363 |
+
"learning_rate": 8.689682089409917e-05,
|
41364 |
+
"loss": 0.1559,
|
41365 |
+
"step": 5908
|
41366 |
+
},
|
41367 |
+
{
|
41368 |
+
"epoch": 0.23892365077279207,
|
41369 |
+
"grad_norm": 3.4499974250793457,
|
41370 |
+
"learning_rate": 8.689251673423401e-05,
|
41371 |
+
"loss": 0.2596,
|
41372 |
+
"step": 5909
|
41373 |
+
},
|
41374 |
+
{
|
41375 |
+
"epoch": 0.23896408462805907,
|
41376 |
+
"grad_norm": 4.022715091705322,
|
41377 |
+
"learning_rate": 8.688821197419931e-05,
|
41378 |
+
"loss": 0.1747,
|
41379 |
+
"step": 5910
|
41380 |
+
},
|
41381 |
+
{
|
41382 |
+
"epoch": 0.2390045184833261,
|
41383 |
+
"grad_norm": 2.1420822143554688,
|
41384 |
+
"learning_rate": 8.688390661406514e-05,
|
41385 |
+
"loss": 0.1691,
|
41386 |
+
"step": 5911
|
41387 |
+
},
|
41388 |
+
{
|
41389 |
+
"epoch": 0.2390449523385931,
|
41390 |
+
"grad_norm": 3.609046459197998,
|
41391 |
+
"learning_rate": 8.687960065390149e-05,
|
41392 |
+
"loss": 0.1622,
|
41393 |
+
"step": 5912
|
41394 |
+
},
|
41395 |
+
{
|
41396 |
+
"epoch": 0.2390853861938601,
|
41397 |
+
"grad_norm": 2.8119325637817383,
|
41398 |
+
"learning_rate": 8.687529409377845e-05,
|
41399 |
+
"loss": 0.1076,
|
41400 |
+
"step": 5913
|
41401 |
+
},
|
41402 |
+
{
|
41403 |
+
"epoch": 0.23912582004912714,
|
41404 |
+
"grad_norm": 2.6061933040618896,
|
41405 |
+
"learning_rate": 8.687098693376605e-05,
|
41406 |
+
"loss": 0.1958,
|
41407 |
+
"step": 5914
|
41408 |
+
},
|
41409 |
+
{
|
41410 |
+
"epoch": 0.23916625390439414,
|
41411 |
+
"grad_norm": 2.707136392593384,
|
41412 |
+
"learning_rate": 8.686667917393438e-05,
|
41413 |
+
"loss": 0.1092,
|
41414 |
+
"step": 5915
|
41415 |
+
},
|
41416 |
+
{
|
41417 |
+
"epoch": 0.23920668775966117,
|
41418 |
+
"grad_norm": 1.820064663887024,
|
41419 |
+
"learning_rate": 8.686237081435352e-05,
|
41420 |
+
"loss": 0.104,
|
41421 |
+
"step": 5916
|
41422 |
+
},
|
41423 |
+
{
|
41424 |
+
"epoch": 0.23924712161492817,
|
41425 |
+
"grad_norm": 3.2410926818847656,
|
41426 |
+
"learning_rate": 8.685806185509356e-05,
|
41427 |
+
"loss": 0.1474,
|
41428 |
+
"step": 5917
|
41429 |
+
},
|
41430 |
+
{
|
41431 |
+
"epoch": 0.2392875554701952,
|
41432 |
+
"grad_norm": 6.133182048797607,
|
41433 |
+
"learning_rate": 8.685375229622456e-05,
|
41434 |
+
"loss": 0.1022,
|
41435 |
+
"step": 5918
|
41436 |
+
},
|
41437 |
+
{
|
41438 |
+
"epoch": 0.2393279893254622,
|
41439 |
+
"grad_norm": 3.047478437423706,
|
41440 |
+
"learning_rate": 8.684944213781667e-05,
|
41441 |
+
"loss": 0.1127,
|
41442 |
+
"step": 5919
|
41443 |
+
},
|
41444 |
+
{
|
41445 |
+
"epoch": 0.23936842318072923,
|
41446 |
+
"grad_norm": 4.528893947601318,
|
41447 |
+
"learning_rate": 8.684513137994e-05,
|
41448 |
+
"loss": 0.2314,
|
41449 |
+
"step": 5920
|
41450 |
+
},
|
41451 |
+
{
|
41452 |
+
"epoch": 0.23940885703599624,
|
41453 |
+
"grad_norm": 7.739344120025635,
|
41454 |
+
"learning_rate": 8.684082002266466e-05,
|
41455 |
+
"loss": 0.3065,
|
41456 |
+
"step": 5921
|
41457 |
+
},
|
41458 |
+
{
|
41459 |
+
"epoch": 0.23944929089126327,
|
41460 |
+
"grad_norm": 1.0010637044906616,
|
41461 |
+
"learning_rate": 8.68365080660608e-05,
|
41462 |
+
"loss": 0.0412,
|
41463 |
+
"step": 5922
|
41464 |
+
},
|
41465 |
+
{
|
41466 |
+
"epoch": 0.23948972474653027,
|
41467 |
+
"grad_norm": 2.762718439102173,
|
41468 |
+
"learning_rate": 8.683219551019859e-05,
|
41469 |
+
"loss": 0.1276,
|
41470 |
+
"step": 5923
|
41471 |
+
},
|
41472 |
+
{
|
41473 |
+
"epoch": 0.23953015860179727,
|
41474 |
+
"grad_norm": 8.688218116760254,
|
41475 |
+
"learning_rate": 8.682788235514813e-05,
|
41476 |
+
"loss": 0.194,
|
41477 |
+
"step": 5924
|
41478 |
+
},
|
41479 |
+
{
|
41480 |
+
"epoch": 0.2395705924570643,
|
41481 |
+
"grad_norm": 3.121171712875366,
|
41482 |
+
"learning_rate": 8.682356860097965e-05,
|
41483 |
+
"loss": 0.2172,
|
41484 |
+
"step": 5925
|
41485 |
+
},
|
41486 |
+
{
|
41487 |
+
"epoch": 0.2396110263123313,
|
41488 |
+
"grad_norm": 3.0537304878234863,
|
41489 |
+
"learning_rate": 8.681925424776329e-05,
|
41490 |
+
"loss": 0.1375,
|
41491 |
+
"step": 5926
|
41492 |
+
},
|
41493 |
+
{
|
41494 |
+
"epoch": 0.23965146016759833,
|
41495 |
+
"grad_norm": 2.694565773010254,
|
41496 |
+
"learning_rate": 8.681493929556923e-05,
|
41497 |
+
"loss": 0.119,
|
41498 |
+
"step": 5927
|
41499 |
+
},
|
41500 |
+
{
|
41501 |
+
"epoch": 0.23969189402286534,
|
41502 |
+
"grad_norm": 1.5173289775848389,
|
41503 |
+
"learning_rate": 8.681062374446769e-05,
|
41504 |
+
"loss": 0.0391,
|
41505 |
+
"step": 5928
|
41506 |
+
},
|
41507 |
+
{
|
41508 |
+
"epoch": 0.23973232787813237,
|
41509 |
+
"grad_norm": 5.209552764892578,
|
41510 |
+
"learning_rate": 8.680630759452888e-05,
|
41511 |
+
"loss": 0.2003,
|
41512 |
+
"step": 5929
|
41513 |
+
},
|
41514 |
+
{
|
41515 |
+
"epoch": 0.23977276173339937,
|
41516 |
+
"grad_norm": 5.338234901428223,
|
41517 |
+
"learning_rate": 8.680199084582296e-05,
|
41518 |
+
"loss": 0.2738,
|
41519 |
+
"step": 5930
|
41520 |
+
},
|
41521 |
+
{
|
41522 |
+
"epoch": 0.2398131955886664,
|
41523 |
+
"grad_norm": 3.0946736335754395,
|
41524 |
+
"learning_rate": 8.679767349842023e-05,
|
41525 |
+
"loss": 0.1803,
|
41526 |
+
"step": 5931
|
41527 |
+
},
|
41528 |
+
{
|
41529 |
+
"epoch": 0.2398536294439334,
|
41530 |
+
"grad_norm": 1.4720056056976318,
|
41531 |
+
"learning_rate": 8.679335555239088e-05,
|
41532 |
+
"loss": 0.0486,
|
41533 |
+
"step": 5932
|
41534 |
+
},
|
41535 |
+
{
|
41536 |
+
"epoch": 0.23989406329920043,
|
41537 |
+
"grad_norm": 2.2390949726104736,
|
41538 |
+
"learning_rate": 8.678903700780516e-05,
|
41539 |
+
"loss": 0.0927,
|
41540 |
+
"step": 5933
|
41541 |
+
},
|
41542 |
+
{
|
41543 |
+
"epoch": 0.23993449715446744,
|
41544 |
+
"grad_norm": 2.8669111728668213,
|
41545 |
+
"learning_rate": 8.678471786473334e-05,
|
41546 |
+
"loss": 0.1533,
|
41547 |
+
"step": 5934
|
41548 |
+
},
|
41549 |
+
{
|
41550 |
+
"epoch": 0.23997493100973444,
|
41551 |
+
"grad_norm": 3.3295137882232666,
|
41552 |
+
"learning_rate": 8.678039812324567e-05,
|
41553 |
+
"loss": 0.0797,
|
41554 |
+
"step": 5935
|
41555 |
+
},
|
41556 |
+
{
|
41557 |
+
"epoch": 0.24001536486500147,
|
41558 |
+
"grad_norm": 5.791916847229004,
|
41559 |
+
"learning_rate": 8.677607778341241e-05,
|
41560 |
+
"loss": 0.173,
|
41561 |
+
"step": 5936
|
41562 |
+
},
|
41563 |
+
{
|
41564 |
+
"epoch": 0.24005579872026847,
|
41565 |
+
"grad_norm": 5.18472146987915,
|
41566 |
+
"learning_rate": 8.677175684530389e-05,
|
41567 |
+
"loss": 0.123,
|
41568 |
+
"step": 5937
|
41569 |
+
},
|
41570 |
+
{
|
41571 |
+
"epoch": 0.2400962325755355,
|
41572 |
+
"grad_norm": 4.101205825805664,
|
41573 |
+
"learning_rate": 8.676743530899036e-05,
|
41574 |
+
"loss": 0.1604,
|
41575 |
+
"step": 5938
|
41576 |
+
},
|
41577 |
+
{
|
41578 |
+
"epoch": 0.2401366664308025,
|
41579 |
+
"grad_norm": 6.613603591918945,
|
41580 |
+
"learning_rate": 8.676311317454213e-05,
|
41581 |
+
"loss": 0.3285,
|
41582 |
+
"step": 5939
|
41583 |
+
},
|
41584 |
+
{
|
41585 |
+
"epoch": 0.24017710028606953,
|
41586 |
+
"grad_norm": 3.877138137817383,
|
41587 |
+
"learning_rate": 8.675879044202952e-05,
|
41588 |
+
"loss": 0.1483,
|
41589 |
+
"step": 5940
|
41590 |
+
},
|
41591 |
+
{
|
41592 |
+
"epoch": 0.24021753414133654,
|
41593 |
+
"grad_norm": 4.349688529968262,
|
41594 |
+
"learning_rate": 8.675446711152286e-05,
|
41595 |
+
"loss": 0.1818,
|
41596 |
+
"step": 5941
|
41597 |
+
},
|
41598 |
+
{
|
41599 |
+
"epoch": 0.24025796799660357,
|
41600 |
+
"grad_norm": 4.138794898986816,
|
41601 |
+
"learning_rate": 8.675014318309247e-05,
|
41602 |
+
"loss": 0.1098,
|
41603 |
+
"step": 5942
|
41604 |
+
},
|
41605 |
+
{
|
41606 |
+
"epoch": 0.24029840185187057,
|
41607 |
+
"grad_norm": 7.146904945373535,
|
41608 |
+
"learning_rate": 8.674581865680868e-05,
|
41609 |
+
"loss": 0.0889,
|
41610 |
+
"step": 5943
|
41611 |
+
},
|
41612 |
+
{
|
41613 |
+
"epoch": 0.2403388357071376,
|
41614 |
+
"grad_norm": 3.275465250015259,
|
41615 |
+
"learning_rate": 8.674149353274188e-05,
|
41616 |
+
"loss": 0.0711,
|
41617 |
+
"step": 5944
|
41618 |
+
},
|
41619 |
+
{
|
41620 |
+
"epoch": 0.2403792695624046,
|
41621 |
+
"grad_norm": 3.784762382507324,
|
41622 |
+
"learning_rate": 8.673716781096241e-05,
|
41623 |
+
"loss": 0.0827,
|
41624 |
+
"step": 5945
|
41625 |
+
},
|
41626 |
+
{
|
41627 |
+
"epoch": 0.2404197034176716,
|
41628 |
+
"grad_norm": 6.69539737701416,
|
41629 |
+
"learning_rate": 8.673284149154062e-05,
|
41630 |
+
"loss": 0.2771,
|
41631 |
+
"step": 5946
|
41632 |
+
},
|
41633 |
+
{
|
41634 |
+
"epoch": 0.24046013727293863,
|
41635 |
+
"grad_norm": 7.916467666625977,
|
41636 |
+
"learning_rate": 8.672851457454694e-05,
|
41637 |
+
"loss": 0.3424,
|
41638 |
+
"step": 5947
|
41639 |
+
},
|
41640 |
+
{
|
41641 |
+
"epoch": 0.24050057112820564,
|
41642 |
+
"grad_norm": 2.8775734901428223,
|
41643 |
+
"learning_rate": 8.67241870600517e-05,
|
41644 |
+
"loss": 0.2087,
|
41645 |
+
"step": 5948
|
41646 |
+
},
|
41647 |
+
{
|
41648 |
+
"epoch": 0.24054100498347267,
|
41649 |
+
"grad_norm": 2.998142719268799,
|
41650 |
+
"learning_rate": 8.671985894812535e-05,
|
41651 |
+
"loss": 0.0888,
|
41652 |
+
"step": 5949
|
41653 |
+
},
|
41654 |
+
{
|
41655 |
+
"epoch": 0.24058143883873967,
|
41656 |
+
"grad_norm": 5.161098957061768,
|
41657 |
+
"learning_rate": 8.671553023883827e-05,
|
41658 |
+
"loss": 0.1359,
|
41659 |
+
"step": 5950
|
41660 |
+
},
|
41661 |
+
{
|
41662 |
+
"epoch": 0.2406218726940067,
|
41663 |
+
"grad_norm": 7.176464080810547,
|
41664 |
+
"learning_rate": 8.67112009322609e-05,
|
41665 |
+
"loss": 0.1972,
|
41666 |
+
"step": 5951
|
41667 |
+
},
|
41668 |
+
{
|
41669 |
+
"epoch": 0.2406623065492737,
|
41670 |
+
"grad_norm": 4.090066909790039,
|
41671 |
+
"learning_rate": 8.670687102846366e-05,
|
41672 |
+
"loss": 0.1149,
|
41673 |
+
"step": 5952
|
41674 |
+
},
|
41675 |
+
{
|
41676 |
+
"epoch": 0.24070274040454073,
|
41677 |
+
"grad_norm": 5.411351680755615,
|
41678 |
+
"learning_rate": 8.670254052751699e-05,
|
41679 |
+
"loss": 0.1941,
|
41680 |
+
"step": 5953
|
41681 |
+
},
|
41682 |
+
{
|
41683 |
+
"epoch": 0.24074317425980774,
|
41684 |
+
"grad_norm": 2.3755807876586914,
|
41685 |
+
"learning_rate": 8.669820942949134e-05,
|
41686 |
+
"loss": 0.2539,
|
41687 |
+
"step": 5954
|
41688 |
+
},
|
41689 |
+
{
|
41690 |
+
"epoch": 0.24078360811507477,
|
41691 |
+
"grad_norm": 6.3790388107299805,
|
41692 |
+
"learning_rate": 8.669387773445716e-05,
|
41693 |
+
"loss": 0.1938,
|
41694 |
+
"step": 5955
|
41695 |
+
},
|
41696 |
+
{
|
41697 |
+
"epoch": 0.24082404197034177,
|
41698 |
+
"grad_norm": 4.425403118133545,
|
41699 |
+
"learning_rate": 8.668954544248493e-05,
|
41700 |
+
"loss": 0.1793,
|
41701 |
+
"step": 5956
|
41702 |
+
},
|
41703 |
+
{
|
41704 |
+
"epoch": 0.24086447582560877,
|
41705 |
+
"grad_norm": 3.932326078414917,
|
41706 |
+
"learning_rate": 8.668521255364514e-05,
|
41707 |
+
"loss": 0.2266,
|
41708 |
+
"step": 5957
|
41709 |
+
},
|
41710 |
+
{
|
41711 |
+
"epoch": 0.2409049096808758,
|
41712 |
+
"grad_norm": 6.011348724365234,
|
41713 |
+
"learning_rate": 8.668087906800824e-05,
|
41714 |
+
"loss": 0.3062,
|
41715 |
+
"step": 5958
|
41716 |
+
},
|
41717 |
+
{
|
41718 |
+
"epoch": 0.2409453435361428,
|
41719 |
+
"grad_norm": 4.63346004486084,
|
41720 |
+
"learning_rate": 8.667654498564474e-05,
|
41721 |
+
"loss": 0.1196,
|
41722 |
+
"step": 5959
|
41723 |
+
},
|
41724 |
+
{
|
41725 |
+
"epoch": 0.24098577739140983,
|
41726 |
+
"grad_norm": 2.6123921871185303,
|
41727 |
+
"learning_rate": 8.667221030662518e-05,
|
41728 |
+
"loss": 0.1411,
|
41729 |
+
"step": 5960
|
41730 |
+
},
|
41731 |
+
{
|
41732 |
+
"epoch": 0.24102621124667684,
|
41733 |
+
"grad_norm": 3.8898696899414062,
|
41734 |
+
"learning_rate": 8.666787503102004e-05,
|
41735 |
+
"loss": 0.1585,
|
41736 |
+
"step": 5961
|
41737 |
+
},
|
41738 |
+
{
|
41739 |
+
"epoch": 0.24106664510194387,
|
41740 |
+
"grad_norm": 5.681251049041748,
|
41741 |
+
"learning_rate": 8.666353915889987e-05,
|
41742 |
+
"loss": 0.1883,
|
41743 |
+
"step": 5962
|
41744 |
+
},
|
41745 |
+
{
|
41746 |
+
"epoch": 0.24110707895721087,
|
41747 |
+
"grad_norm": 2.7869033813476562,
|
41748 |
+
"learning_rate": 8.665920269033518e-05,
|
41749 |
+
"loss": 0.1424,
|
41750 |
+
"step": 5963
|
41751 |
+
},
|
41752 |
+
{
|
41753 |
+
"epoch": 0.2411475128124779,
|
41754 |
+
"grad_norm": 4.215097904205322,
|
41755 |
+
"learning_rate": 8.665486562539653e-05,
|
41756 |
+
"loss": 0.1008,
|
41757 |
+
"step": 5964
|
41758 |
+
},
|
41759 |
+
{
|
41760 |
+
"epoch": 0.2411879466677449,
|
41761 |
+
"grad_norm": 12.377710342407227,
|
41762 |
+
"learning_rate": 8.665052796415448e-05,
|
41763 |
+
"loss": 0.338,
|
41764 |
+
"step": 5965
|
41765 |
+
},
|
41766 |
+
{
|
41767 |
+
"epoch": 0.24122838052301193,
|
41768 |
+
"grad_norm": 5.801630020141602,
|
41769 |
+
"learning_rate": 8.664618970667959e-05,
|
41770 |
+
"loss": 0.1894,
|
41771 |
+
"step": 5966
|
41772 |
+
},
|
41773 |
+
{
|
41774 |
+
"epoch": 0.24126881437827893,
|
41775 |
+
"grad_norm": 2.7168922424316406,
|
41776 |
+
"learning_rate": 8.664185085304244e-05,
|
41777 |
+
"loss": 0.0998,
|
41778 |
+
"step": 5967
|
41779 |
+
},
|
41780 |
+
{
|
41781 |
+
"epoch": 0.24130924823354594,
|
41782 |
+
"grad_norm": 2.9877679347991943,
|
41783 |
+
"learning_rate": 8.663751140331362e-05,
|
41784 |
+
"loss": 0.1775,
|
41785 |
+
"step": 5968
|
41786 |
+
},
|
41787 |
+
{
|
41788 |
+
"epoch": 0.24134968208881297,
|
41789 |
+
"grad_norm": 3.547353982925415,
|
41790 |
+
"learning_rate": 8.663317135756371e-05,
|
41791 |
+
"loss": 0.1909,
|
41792 |
+
"step": 5969
|
41793 |
+
},
|
41794 |
+
{
|
41795 |
+
"epoch": 0.24139011594407997,
|
41796 |
+
"grad_norm": 2.1182587146759033,
|
41797 |
+
"learning_rate": 8.662883071586332e-05,
|
41798 |
+
"loss": 0.1777,
|
41799 |
+
"step": 5970
|
41800 |
+
},
|
41801 |
+
{
|
41802 |
+
"epoch": 0.241430549799347,
|
41803 |
+
"grad_norm": 5.092315673828125,
|
41804 |
+
"learning_rate": 8.662448947828305e-05,
|
41805 |
+
"loss": 0.239,
|
41806 |
+
"step": 5971
|
41807 |
+
},
|
41808 |
+
{
|
41809 |
+
"epoch": 0.241470983654614,
|
41810 |
+
"grad_norm": 4.349262714385986,
|
41811 |
+
"learning_rate": 8.662014764489357e-05,
|
41812 |
+
"loss": 0.2238,
|
41813 |
+
"step": 5972
|
41814 |
+
},
|
41815 |
+
{
|
41816 |
+
"epoch": 0.24151141750988103,
|
41817 |
+
"grad_norm": 4.644000053405762,
|
41818 |
+
"learning_rate": 8.661580521576545e-05,
|
41819 |
+
"loss": 0.238,
|
41820 |
+
"step": 5973
|
41821 |
+
},
|
41822 |
+
{
|
41823 |
+
"epoch": 0.24155185136514803,
|
41824 |
+
"grad_norm": 3.5591719150543213,
|
41825 |
+
"learning_rate": 8.661146219096938e-05,
|
41826 |
+
"loss": 0.1155,
|
41827 |
+
"step": 5974
|
41828 |
+
},
|
41829 |
+
{
|
41830 |
+
"epoch": 0.24159228522041507,
|
41831 |
+
"grad_norm": 2.778860092163086,
|
41832 |
+
"learning_rate": 8.6607118570576e-05,
|
41833 |
+
"loss": 0.1316,
|
41834 |
+
"step": 5975
|
41835 |
}
|
41836 |
],
|
41837 |
"logging_steps": 1,
|
|
|
41851 |
"attributes": {}
|
41852 |
}
|
41853 |
},
|
41854 |
+
"total_flos": 3.695588344332288e+17,
|
41855 |
"train_batch_size": 4,
|
41856 |
"trial_name": null,
|
41857 |
"trial_params": null
|