jonathanjordan21
commited on
Commit
•
bd1d7ee
1
Parent(s):
9e8b8c9
Upload folder using huggingface_hub
Browse files- data/model.safetensors +1 -1
- data/optimizer.pt +1 -1
- data/rng_state.pth +1 -1
- data/scheduler.pt +1 -1
- data/trainer_state.json +2341 -3
data/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 576008736
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd1709db18e90edc24059b64a578b586ae315622e2466a225c5605e3fa28c2d7
|
3 |
size 576008736
|
data/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1152256984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3e4e0e6b57e3e8d11e9e3c939267d213e6cc8c2683a951981bc53a1252a6976
|
3 |
size 1152256984
|
data/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f24c4425e9d6c449b00fd9a1872bee61b2463d9ca33c8e91c1c5a45cf6c44127
|
3 |
size 14244
|
data/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4d72a7225743ea21366860ec06ef85504326088672296a72343c37eced8e566
|
3 |
size 1064
|
data/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -11669,6 +11669,2344 @@
|
|
11669 |
"learning_rate": 2.9825041073997102e-05,
|
11670 |
"loss": 1.9942,
|
11671 |
"step": 24990
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11672 |
}
|
11673 |
],
|
11674 |
"logging_steps": 15,
|
@@ -11688,7 +14026,7 @@
|
|
11688 |
"attributes": {}
|
11689 |
}
|
11690 |
},
|
11691 |
-
"total_flos":
|
11692 |
"train_batch_size": 4,
|
11693 |
"trial_name": null,
|
11694 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.05842054222052586,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 30000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
11669 |
"learning_rate": 2.9825041073997102e-05,
|
11670 |
"loss": 1.9942,
|
11671 |
"step": 24990
|
11672 |
+
},
|
11673 |
+
{
|
11674 |
+
"epoch": 0.04869352194080831,
|
11675 |
+
"grad_norm": 3.3070085048675537,
|
11676 |
+
"learning_rate": 2.9824831386111103e-05,
|
11677 |
+
"loss": 1.5052,
|
11678 |
+
"step": 25005
|
11679 |
+
},
|
11680 |
+
{
|
11681 |
+
"epoch": 0.04872273221191857,
|
11682 |
+
"grad_norm": 2.769148826599121,
|
11683 |
+
"learning_rate": 2.9824621573383107e-05,
|
11684 |
+
"loss": 1.8361,
|
11685 |
+
"step": 25020
|
11686 |
+
},
|
11687 |
+
{
|
11688 |
+
"epoch": 0.04875194248302883,
|
11689 |
+
"grad_norm": 2.3901126384735107,
|
11690 |
+
"learning_rate": 2.982441163581489e-05,
|
11691 |
+
"loss": 1.8346,
|
11692 |
+
"step": 25035
|
11693 |
+
},
|
11694 |
+
{
|
11695 |
+
"epoch": 0.04878115275413909,
|
11696 |
+
"grad_norm": 3.8897323608398438,
|
11697 |
+
"learning_rate": 2.9824201573408218e-05,
|
11698 |
+
"loss": 1.8186,
|
11699 |
+
"step": 25050
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 0.04881036302524936,
|
11703 |
+
"grad_norm": 1.947713017463684,
|
11704 |
+
"learning_rate": 2.982399138616486e-05,
|
11705 |
+
"loss": 1.8812,
|
11706 |
+
"step": 25065
|
11707 |
+
},
|
11708 |
+
{
|
11709 |
+
"epoch": 0.04883957329635962,
|
11710 |
+
"grad_norm": 1.6136304140090942,
|
11711 |
+
"learning_rate": 2.9823781074086582e-05,
|
11712 |
+
"loss": 2.0161,
|
11713 |
+
"step": 25080
|
11714 |
+
},
|
11715 |
+
{
|
11716 |
+
"epoch": 0.04886878356746988,
|
11717 |
+
"grad_norm": 2.6880128383636475,
|
11718 |
+
"learning_rate": 2.9823570637175166e-05,
|
11719 |
+
"loss": 1.8861,
|
11720 |
+
"step": 25095
|
11721 |
+
},
|
11722 |
+
{
|
11723 |
+
"epoch": 0.04889799383858015,
|
11724 |
+
"grad_norm": 4.511691570281982,
|
11725 |
+
"learning_rate": 2.982336007543237e-05,
|
11726 |
+
"loss": 1.8334,
|
11727 |
+
"step": 25110
|
11728 |
+
},
|
11729 |
+
{
|
11730 |
+
"epoch": 0.04892720410969041,
|
11731 |
+
"grad_norm": 2.540619134902954,
|
11732 |
+
"learning_rate": 2.9823149388859975e-05,
|
11733 |
+
"loss": 1.8554,
|
11734 |
+
"step": 25125
|
11735 |
+
},
|
11736 |
+
{
|
11737 |
+
"epoch": 0.04895641438080067,
|
11738 |
+
"grad_norm": 2.650416135787964,
|
11739 |
+
"learning_rate": 2.982293857745976e-05,
|
11740 |
+
"loss": 2.0556,
|
11741 |
+
"step": 25140
|
11742 |
+
},
|
11743 |
+
{
|
11744 |
+
"epoch": 0.04898562465191093,
|
11745 |
+
"grad_norm": 4.075965881347656,
|
11746 |
+
"learning_rate": 2.9822727641233488e-05,
|
11747 |
+
"loss": 1.7369,
|
11748 |
+
"step": 25155
|
11749 |
+
},
|
11750 |
+
{
|
11751 |
+
"epoch": 0.0490148349230212,
|
11752 |
+
"grad_norm": 4.21481466293335,
|
11753 |
+
"learning_rate": 2.9822516580182944e-05,
|
11754 |
+
"loss": 1.9624,
|
11755 |
+
"step": 25170
|
11756 |
+
},
|
11757 |
+
{
|
11758 |
+
"epoch": 0.04904404519413146,
|
11759 |
+
"grad_norm": 4.617081642150879,
|
11760 |
+
"learning_rate": 2.98223053943099e-05,
|
11761 |
+
"loss": 1.7276,
|
11762 |
+
"step": 25185
|
11763 |
+
},
|
11764 |
+
{
|
11765 |
+
"epoch": 0.04907325546524172,
|
11766 |
+
"grad_norm": 3.5099408626556396,
|
11767 |
+
"learning_rate": 2.9822094083616145e-05,
|
11768 |
+
"loss": 1.8783,
|
11769 |
+
"step": 25200
|
11770 |
+
},
|
11771 |
+
{
|
11772 |
+
"epoch": 0.049102465736351984,
|
11773 |
+
"grad_norm": 4.118253231048584,
|
11774 |
+
"learning_rate": 2.9821882648103445e-05,
|
11775 |
+
"loss": 1.7848,
|
11776 |
+
"step": 25215
|
11777 |
+
},
|
11778 |
+
{
|
11779 |
+
"epoch": 0.04913167600746225,
|
11780 |
+
"grad_norm": 3.617659091949463,
|
11781 |
+
"learning_rate": 2.982167108777359e-05,
|
11782 |
+
"loss": 1.8307,
|
11783 |
+
"step": 25230
|
11784 |
+
},
|
11785 |
+
{
|
11786 |
+
"epoch": 0.04916088627857251,
|
11787 |
+
"grad_norm": 3.3717000484466553,
|
11788 |
+
"learning_rate": 2.9821459402628357e-05,
|
11789 |
+
"loss": 1.6414,
|
11790 |
+
"step": 25245
|
11791 |
+
},
|
11792 |
+
{
|
11793 |
+
"epoch": 0.049190096549682774,
|
11794 |
+
"grad_norm": 3.3341469764709473,
|
11795 |
+
"learning_rate": 2.9821247592669526e-05,
|
11796 |
+
"loss": 1.9641,
|
11797 |
+
"step": 25260
|
11798 |
+
},
|
11799 |
+
{
|
11800 |
+
"epoch": 0.04921930682079304,
|
11801 |
+
"grad_norm": 3.8818039894104004,
|
11802 |
+
"learning_rate": 2.9821035657898886e-05,
|
11803 |
+
"loss": 1.8227,
|
11804 |
+
"step": 25275
|
11805 |
+
},
|
11806 |
+
{
|
11807 |
+
"epoch": 0.0492485170919033,
|
11808 |
+
"grad_norm": 2.4647955894470215,
|
11809 |
+
"learning_rate": 2.9820823598318226e-05,
|
11810 |
+
"loss": 1.9441,
|
11811 |
+
"step": 25290
|
11812 |
+
},
|
11813 |
+
{
|
11814 |
+
"epoch": 0.049277727363013564,
|
11815 |
+
"grad_norm": 5.145657062530518,
|
11816 |
+
"learning_rate": 2.9820611413929318e-05,
|
11817 |
+
"loss": 1.7515,
|
11818 |
+
"step": 25305
|
11819 |
+
},
|
11820 |
+
{
|
11821 |
+
"epoch": 0.049306937634123825,
|
11822 |
+
"grad_norm": 2.595554828643799,
|
11823 |
+
"learning_rate": 2.9820399104733964e-05,
|
11824 |
+
"loss": 1.8185,
|
11825 |
+
"step": 25320
|
11826 |
+
},
|
11827 |
+
{
|
11828 |
+
"epoch": 0.04933614790523409,
|
11829 |
+
"grad_norm": 3.0854387283325195,
|
11830 |
+
"learning_rate": 2.9820186670733944e-05,
|
11831 |
+
"loss": 2.0462,
|
11832 |
+
"step": 25335
|
11833 |
+
},
|
11834 |
+
{
|
11835 |
+
"epoch": 0.049365358176344354,
|
11836 |
+
"grad_norm": 4.086148262023926,
|
11837 |
+
"learning_rate": 2.9819974111931045e-05,
|
11838 |
+
"loss": 1.923,
|
11839 |
+
"step": 25350
|
11840 |
+
},
|
11841 |
+
{
|
11842 |
+
"epoch": 0.049394568447454615,
|
11843 |
+
"grad_norm": 2.3999664783477783,
|
11844 |
+
"learning_rate": 2.9819761428327057e-05,
|
11845 |
+
"loss": 1.9243,
|
11846 |
+
"step": 25365
|
11847 |
+
},
|
11848 |
+
{
|
11849 |
+
"epoch": 0.04942377871856488,
|
11850 |
+
"grad_norm": 1.8367825746536255,
|
11851 |
+
"learning_rate": 2.981954861992378e-05,
|
11852 |
+
"loss": 1.9104,
|
11853 |
+
"step": 25380
|
11854 |
+
},
|
11855 |
+
{
|
11856 |
+
"epoch": 0.049452988989675144,
|
11857 |
+
"grad_norm": 2.3233165740966797,
|
11858 |
+
"learning_rate": 2.9819335686722997e-05,
|
11859 |
+
"loss": 1.7446,
|
11860 |
+
"step": 25395
|
11861 |
+
},
|
11862 |
+
{
|
11863 |
+
"epoch": 0.049482199260785405,
|
11864 |
+
"grad_norm": 3.7498881816864014,
|
11865 |
+
"learning_rate": 2.98191226287265e-05,
|
11866 |
+
"loss": 2.0172,
|
11867 |
+
"step": 25410
|
11868 |
+
},
|
11869 |
+
{
|
11870 |
+
"epoch": 0.049511409531895666,
|
11871 |
+
"grad_norm": 2.636087656021118,
|
11872 |
+
"learning_rate": 2.9818909445936092e-05,
|
11873 |
+
"loss": 1.9958,
|
11874 |
+
"step": 25425
|
11875 |
+
},
|
11876 |
+
{
|
11877 |
+
"epoch": 0.049540619803005934,
|
11878 |
+
"grad_norm": 2.6167049407958984,
|
11879 |
+
"learning_rate": 2.9818696138353564e-05,
|
11880 |
+
"loss": 1.9746,
|
11881 |
+
"step": 25440
|
11882 |
+
},
|
11883 |
+
{
|
11884 |
+
"epoch": 0.049569830074116195,
|
11885 |
+
"grad_norm": 2.699380874633789,
|
11886 |
+
"learning_rate": 2.9818482705980708e-05,
|
11887 |
+
"loss": 2.0158,
|
11888 |
+
"step": 25455
|
11889 |
+
},
|
11890 |
+
{
|
11891 |
+
"epoch": 0.049599040345226456,
|
11892 |
+
"grad_norm": 2.8639230728149414,
|
11893 |
+
"learning_rate": 2.9818269148819326e-05,
|
11894 |
+
"loss": 1.8795,
|
11895 |
+
"step": 25470
|
11896 |
+
},
|
11897 |
+
{
|
11898 |
+
"epoch": 0.04962825061633672,
|
11899 |
+
"grad_norm": 3.6716597080230713,
|
11900 |
+
"learning_rate": 2.9818055466871217e-05,
|
11901 |
+
"loss": 2.0066,
|
11902 |
+
"step": 25485
|
11903 |
+
},
|
11904 |
+
{
|
11905 |
+
"epoch": 0.049657460887446984,
|
11906 |
+
"grad_norm": 3.0852763652801514,
|
11907 |
+
"learning_rate": 2.981784166013818e-05,
|
11908 |
+
"loss": 1.8392,
|
11909 |
+
"step": 25500
|
11910 |
+
},
|
11911 |
+
{
|
11912 |
+
"epoch": 0.049686671158557245,
|
11913 |
+
"grad_norm": 2.0497000217437744,
|
11914 |
+
"learning_rate": 2.981762772862201e-05,
|
11915 |
+
"loss": 2.0587,
|
11916 |
+
"step": 25515
|
11917 |
+
},
|
11918 |
+
{
|
11919 |
+
"epoch": 0.049715881429667506,
|
11920 |
+
"grad_norm": 4.302377700805664,
|
11921 |
+
"learning_rate": 2.9817413672324517e-05,
|
11922 |
+
"loss": 1.9415,
|
11923 |
+
"step": 25530
|
11924 |
+
},
|
11925 |
+
{
|
11926 |
+
"epoch": 0.049745091700777774,
|
11927 |
+
"grad_norm": 2.478428840637207,
|
11928 |
+
"learning_rate": 2.9817199491247495e-05,
|
11929 |
+
"loss": 2.0082,
|
11930 |
+
"step": 25545
|
11931 |
+
},
|
11932 |
+
{
|
11933 |
+
"epoch": 0.049774301971888035,
|
11934 |
+
"grad_norm": 3.375516891479492,
|
11935 |
+
"learning_rate": 2.9816985185392752e-05,
|
11936 |
+
"loss": 2.061,
|
11937 |
+
"step": 25560
|
11938 |
+
},
|
11939 |
+
{
|
11940 |
+
"epoch": 0.049803512242998296,
|
11941 |
+
"grad_norm": 2.3733456134796143,
|
11942 |
+
"learning_rate": 2.9816770754762094e-05,
|
11943 |
+
"loss": 1.8752,
|
11944 |
+
"step": 25575
|
11945 |
+
},
|
11946 |
+
{
|
11947 |
+
"epoch": 0.049832722514108564,
|
11948 |
+
"grad_norm": 2.8646862506866455,
|
11949 |
+
"learning_rate": 2.9816556199357334e-05,
|
11950 |
+
"loss": 1.9048,
|
11951 |
+
"step": 25590
|
11952 |
+
},
|
11953 |
+
{
|
11954 |
+
"epoch": 0.049861932785218825,
|
11955 |
+
"grad_norm": 3.711494207382202,
|
11956 |
+
"learning_rate": 2.981634151918026e-05,
|
11957 |
+
"loss": 1.8555,
|
11958 |
+
"step": 25605
|
11959 |
+
},
|
11960 |
+
{
|
11961 |
+
"epoch": 0.049891143056329086,
|
11962 |
+
"grad_norm": 4.652657985687256,
|
11963 |
+
"learning_rate": 2.9816126714232694e-05,
|
11964 |
+
"loss": 1.9591,
|
11965 |
+
"step": 25620
|
11966 |
+
},
|
11967 |
+
{
|
11968 |
+
"epoch": 0.04992035332743935,
|
11969 |
+
"grad_norm": 3.5854547023773193,
|
11970 |
+
"learning_rate": 2.981591178451644e-05,
|
11971 |
+
"loss": 1.8043,
|
11972 |
+
"step": 25635
|
11973 |
+
},
|
11974 |
+
{
|
11975 |
+
"epoch": 0.049949563598549615,
|
11976 |
+
"grad_norm": 2.532128095626831,
|
11977 |
+
"learning_rate": 2.981569673003331e-05,
|
11978 |
+
"loss": 1.7456,
|
11979 |
+
"step": 25650
|
11980 |
+
},
|
11981 |
+
{
|
11982 |
+
"epoch": 0.049978773869659876,
|
11983 |
+
"grad_norm": 1.8968348503112793,
|
11984 |
+
"learning_rate": 2.9815481550785116e-05,
|
11985 |
+
"loss": 1.8971,
|
11986 |
+
"step": 25665
|
11987 |
+
},
|
11988 |
+
{
|
11989 |
+
"epoch": 0.05000798414077014,
|
11990 |
+
"grad_norm": 4.662414073944092,
|
11991 |
+
"learning_rate": 2.9815266246773663e-05,
|
11992 |
+
"loss": 2.1576,
|
11993 |
+
"step": 25680
|
11994 |
+
},
|
11995 |
+
{
|
11996 |
+
"epoch": 0.050037194411880405,
|
11997 |
+
"grad_norm": 2.1178083419799805,
|
11998 |
+
"learning_rate": 2.9815050818000773e-05,
|
11999 |
+
"loss": 1.9055,
|
12000 |
+
"step": 25695
|
12001 |
+
},
|
12002 |
+
{
|
12003 |
+
"epoch": 0.050066404682990666,
|
12004 |
+
"grad_norm": 3.8748810291290283,
|
12005 |
+
"learning_rate": 2.9814835264468254e-05,
|
12006 |
+
"loss": 1.8177,
|
12007 |
+
"step": 25710
|
12008 |
+
},
|
12009 |
+
{
|
12010 |
+
"epoch": 0.05009561495410093,
|
12011 |
+
"grad_norm": 3.424405097961426,
|
12012 |
+
"learning_rate": 2.9814619586177926e-05,
|
12013 |
+
"loss": 1.8804,
|
12014 |
+
"step": 25725
|
12015 |
+
},
|
12016 |
+
{
|
12017 |
+
"epoch": 0.05012482522521119,
|
12018 |
+
"grad_norm": 4.104612827301025,
|
12019 |
+
"learning_rate": 2.98144037831316e-05,
|
12020 |
+
"loss": 1.973,
|
12021 |
+
"step": 25740
|
12022 |
+
},
|
12023 |
+
{
|
12024 |
+
"epoch": 0.050154035496321456,
|
12025 |
+
"grad_norm": 2.1848320960998535,
|
12026 |
+
"learning_rate": 2.98141878553311e-05,
|
12027 |
+
"loss": 1.8529,
|
12028 |
+
"step": 25755
|
12029 |
+
},
|
12030 |
+
{
|
12031 |
+
"epoch": 0.05018324576743172,
|
12032 |
+
"grad_norm": 4.191700458526611,
|
12033 |
+
"learning_rate": 2.981397180277824e-05,
|
12034 |
+
"loss": 1.8094,
|
12035 |
+
"step": 25770
|
12036 |
+
},
|
12037 |
+
{
|
12038 |
+
"epoch": 0.05021245603854198,
|
12039 |
+
"grad_norm": 3.3454208374023438,
|
12040 |
+
"learning_rate": 2.981375562547484e-05,
|
12041 |
+
"loss": 1.9105,
|
12042 |
+
"step": 25785
|
12043 |
+
},
|
12044 |
+
{
|
12045 |
+
"epoch": 0.05024166630965224,
|
12046 |
+
"grad_norm": 2.848618507385254,
|
12047 |
+
"learning_rate": 2.9813539323422717e-05,
|
12048 |
+
"loss": 1.9054,
|
12049 |
+
"step": 25800
|
12050 |
+
},
|
12051 |
+
{
|
12052 |
+
"epoch": 0.05027087658076251,
|
12053 |
+
"grad_norm": 1.7352811098098755,
|
12054 |
+
"learning_rate": 2.98133228966237e-05,
|
12055 |
+
"loss": 1.91,
|
12056 |
+
"step": 25815
|
12057 |
+
},
|
12058 |
+
{
|
12059 |
+
"epoch": 0.05030008685187277,
|
12060 |
+
"grad_norm": 2.177297592163086,
|
12061 |
+
"learning_rate": 2.9813106345079604e-05,
|
12062 |
+
"loss": 1.8276,
|
12063 |
+
"step": 25830
|
12064 |
+
},
|
12065 |
+
{
|
12066 |
+
"epoch": 0.05032929712298303,
|
12067 |
+
"grad_norm": 2.1822316646575928,
|
12068 |
+
"learning_rate": 2.981288966879226e-05,
|
12069 |
+
"loss": 1.7961,
|
12070 |
+
"step": 25845
|
12071 |
+
},
|
12072 |
+
{
|
12073 |
+
"epoch": 0.050358507394093296,
|
12074 |
+
"grad_norm": 2.70729398727417,
|
12075 |
+
"learning_rate": 2.9812672867763482e-05,
|
12076 |
+
"loss": 1.7497,
|
12077 |
+
"step": 25860
|
12078 |
+
},
|
12079 |
+
{
|
12080 |
+
"epoch": 0.05038771766520356,
|
12081 |
+
"grad_norm": 2.869450807571411,
|
12082 |
+
"learning_rate": 2.981245594199511e-05,
|
12083 |
+
"loss": 1.8309,
|
12084 |
+
"step": 25875
|
12085 |
+
},
|
12086 |
+
{
|
12087 |
+
"epoch": 0.05041692793631382,
|
12088 |
+
"grad_norm": 2.233219623565674,
|
12089 |
+
"learning_rate": 2.981223889148896e-05,
|
12090 |
+
"loss": 1.8577,
|
12091 |
+
"step": 25890
|
12092 |
+
},
|
12093 |
+
{
|
12094 |
+
"epoch": 0.05044613820742408,
|
12095 |
+
"grad_norm": 5.402493000030518,
|
12096 |
+
"learning_rate": 2.981202171624686e-05,
|
12097 |
+
"loss": 1.9151,
|
12098 |
+
"step": 25905
|
12099 |
+
},
|
12100 |
+
{
|
12101 |
+
"epoch": 0.05047534847853435,
|
12102 |
+
"grad_norm": 4.127412796020508,
|
12103 |
+
"learning_rate": 2.9811804416270648e-05,
|
12104 |
+
"loss": 1.8644,
|
12105 |
+
"step": 25920
|
12106 |
+
},
|
12107 |
+
{
|
12108 |
+
"epoch": 0.05050455874964461,
|
12109 |
+
"grad_norm": 2.9654555320739746,
|
12110 |
+
"learning_rate": 2.9811586991562145e-05,
|
12111 |
+
"loss": 1.99,
|
12112 |
+
"step": 25935
|
12113 |
+
},
|
12114 |
+
{
|
12115 |
+
"epoch": 0.05053376902075487,
|
12116 |
+
"grad_norm": 2.6316134929656982,
|
12117 |
+
"learning_rate": 2.981136944212318e-05,
|
12118 |
+
"loss": 1.7434,
|
12119 |
+
"step": 25950
|
12120 |
+
},
|
12121 |
+
{
|
12122 |
+
"epoch": 0.05056297929186514,
|
12123 |
+
"grad_norm": 2.6688010692596436,
|
12124 |
+
"learning_rate": 2.9811151767955597e-05,
|
12125 |
+
"loss": 1.8075,
|
12126 |
+
"step": 25965
|
12127 |
+
},
|
12128 |
+
{
|
12129 |
+
"epoch": 0.0505921895629754,
|
12130 |
+
"grad_norm": 1.7729136943817139,
|
12131 |
+
"learning_rate": 2.981093396906122e-05,
|
12132 |
+
"loss": 1.7828,
|
12133 |
+
"step": 25980
|
12134 |
+
},
|
12135 |
+
{
|
12136 |
+
"epoch": 0.05062139983408566,
|
12137 |
+
"grad_norm": 3.017512321472168,
|
12138 |
+
"learning_rate": 2.9810716045441884e-05,
|
12139 |
+
"loss": 1.9602,
|
12140 |
+
"step": 25995
|
12141 |
+
},
|
12142 |
+
{
|
12143 |
+
"epoch": 0.05065061010519592,
|
12144 |
+
"grad_norm": 3.954099416732788,
|
12145 |
+
"learning_rate": 2.9810497997099427e-05,
|
12146 |
+
"loss": 1.9158,
|
12147 |
+
"step": 26010
|
12148 |
+
},
|
12149 |
+
{
|
12150 |
+
"epoch": 0.05067982037630619,
|
12151 |
+
"grad_norm": 4.384187698364258,
|
12152 |
+
"learning_rate": 2.981027982403568e-05,
|
12153 |
+
"loss": 2.0172,
|
12154 |
+
"step": 26025
|
12155 |
+
},
|
12156 |
+
{
|
12157 |
+
"epoch": 0.05070903064741645,
|
12158 |
+
"grad_norm": 2.7591800689697266,
|
12159 |
+
"learning_rate": 2.9810061526252488e-05,
|
12160 |
+
"loss": 1.8372,
|
12161 |
+
"step": 26040
|
12162 |
+
},
|
12163 |
+
{
|
12164 |
+
"epoch": 0.05073824091852671,
|
12165 |
+
"grad_norm": 2.6615748405456543,
|
12166 |
+
"learning_rate": 2.980984310375168e-05,
|
12167 |
+
"loss": 1.8127,
|
12168 |
+
"step": 26055
|
12169 |
+
},
|
12170 |
+
{
|
12171 |
+
"epoch": 0.05076745118963698,
|
12172 |
+
"grad_norm": 3.1193888187408447,
|
12173 |
+
"learning_rate": 2.9809624556535106e-05,
|
12174 |
+
"loss": 1.8826,
|
12175 |
+
"step": 26070
|
12176 |
+
},
|
12177 |
+
{
|
12178 |
+
"epoch": 0.05079666146074724,
|
12179 |
+
"grad_norm": 2.338146448135376,
|
12180 |
+
"learning_rate": 2.9809405884604594e-05,
|
12181 |
+
"loss": 1.9003,
|
12182 |
+
"step": 26085
|
12183 |
+
},
|
12184 |
+
{
|
12185 |
+
"epoch": 0.0508258717318575,
|
12186 |
+
"grad_norm": 2.339162588119507,
|
12187 |
+
"learning_rate": 2.9809187087961993e-05,
|
12188 |
+
"loss": 1.8444,
|
12189 |
+
"step": 26100
|
12190 |
+
},
|
12191 |
+
{
|
12192 |
+
"epoch": 0.05085508200296776,
|
12193 |
+
"grad_norm": 6.189070224761963,
|
12194 |
+
"learning_rate": 2.980896816660915e-05,
|
12195 |
+
"loss": 1.8682,
|
12196 |
+
"step": 26115
|
12197 |
+
},
|
12198 |
+
{
|
12199 |
+
"epoch": 0.05088429227407803,
|
12200 |
+
"grad_norm": 3.7556257247924805,
|
12201 |
+
"learning_rate": 2.9808749120547898e-05,
|
12202 |
+
"loss": 1.8692,
|
12203 |
+
"step": 26130
|
12204 |
+
},
|
12205 |
+
{
|
12206 |
+
"epoch": 0.05091350254518829,
|
12207 |
+
"grad_norm": 2.4181950092315674,
|
12208 |
+
"learning_rate": 2.980852994978009e-05,
|
12209 |
+
"loss": 1.8497,
|
12210 |
+
"step": 26145
|
12211 |
+
},
|
12212 |
+
{
|
12213 |
+
"epoch": 0.05094271281629855,
|
12214 |
+
"grad_norm": 2.1751251220703125,
|
12215 |
+
"learning_rate": 2.9808310654307566e-05,
|
12216 |
+
"loss": 1.7236,
|
12217 |
+
"step": 26160
|
12218 |
+
},
|
12219 |
+
{
|
12220 |
+
"epoch": 0.05097192308740882,
|
12221 |
+
"grad_norm": 2.304203748703003,
|
12222 |
+
"learning_rate": 2.9808091234132177e-05,
|
12223 |
+
"loss": 1.8394,
|
12224 |
+
"step": 26175
|
12225 |
+
},
|
12226 |
+
{
|
12227 |
+
"epoch": 0.05100113335851908,
|
12228 |
+
"grad_norm": 2.03320050239563,
|
12229 |
+
"learning_rate": 2.980787168925577e-05,
|
12230 |
+
"loss": 2.0082,
|
12231 |
+
"step": 26190
|
12232 |
+
},
|
12233 |
+
{
|
12234 |
+
"epoch": 0.05103034362962934,
|
12235 |
+
"grad_norm": 3.1527299880981445,
|
12236 |
+
"learning_rate": 2.9807652019680195e-05,
|
12237 |
+
"loss": 1.8783,
|
12238 |
+
"step": 26205
|
12239 |
+
},
|
12240 |
+
{
|
12241 |
+
"epoch": 0.0510595539007396,
|
12242 |
+
"grad_norm": 3.638120412826538,
|
12243 |
+
"learning_rate": 2.9807432225407295e-05,
|
12244 |
+
"loss": 1.8912,
|
12245 |
+
"step": 26220
|
12246 |
+
},
|
12247 |
+
{
|
12248 |
+
"epoch": 0.05108876417184987,
|
12249 |
+
"grad_norm": 1.9177019596099854,
|
12250 |
+
"learning_rate": 2.9807212306438927e-05,
|
12251 |
+
"loss": 1.7949,
|
12252 |
+
"step": 26235
|
12253 |
+
},
|
12254 |
+
{
|
12255 |
+
"epoch": 0.05111797444296013,
|
12256 |
+
"grad_norm": 4.763120174407959,
|
12257 |
+
"learning_rate": 2.9806992262776945e-05,
|
12258 |
+
"loss": 1.9464,
|
12259 |
+
"step": 26250
|
12260 |
+
},
|
12261 |
+
{
|
12262 |
+
"epoch": 0.05114718471407039,
|
12263 |
+
"grad_norm": 4.4592132568359375,
|
12264 |
+
"learning_rate": 2.980677209442319e-05,
|
12265 |
+
"loss": 1.834,
|
12266 |
+
"step": 26265
|
12267 |
+
},
|
12268 |
+
{
|
12269 |
+
"epoch": 0.05117639498518065,
|
12270 |
+
"grad_norm": 2.6289279460906982,
|
12271 |
+
"learning_rate": 2.980655180137953e-05,
|
12272 |
+
"loss": 1.7644,
|
12273 |
+
"step": 26280
|
12274 |
+
},
|
12275 |
+
{
|
12276 |
+
"epoch": 0.05120560525629092,
|
12277 |
+
"grad_norm": 2.2794735431671143,
|
12278 |
+
"learning_rate": 2.9806331383647816e-05,
|
12279 |
+
"loss": 1.9125,
|
12280 |
+
"step": 26295
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 0.05123481552740118,
|
12284 |
+
"grad_norm": 3.5884125232696533,
|
12285 |
+
"learning_rate": 2.9806110841229904e-05,
|
12286 |
+
"loss": 1.925,
|
12287 |
+
"step": 26310
|
12288 |
+
},
|
12289 |
+
{
|
12290 |
+
"epoch": 0.05126402579851144,
|
12291 |
+
"grad_norm": 2.8750176429748535,
|
12292 |
+
"learning_rate": 2.9805890174127648e-05,
|
12293 |
+
"loss": 1.6463,
|
12294 |
+
"step": 26325
|
12295 |
+
},
|
12296 |
+
{
|
12297 |
+
"epoch": 0.05129323606962171,
|
12298 |
+
"grad_norm": 3.1167027950286865,
|
12299 |
+
"learning_rate": 2.980566938234291e-05,
|
12300 |
+
"loss": 1.8897,
|
12301 |
+
"step": 26340
|
12302 |
+
},
|
12303 |
+
{
|
12304 |
+
"epoch": 0.05132244634073197,
|
12305 |
+
"grad_norm": 2.8106181621551514,
|
12306 |
+
"learning_rate": 2.9805448465877546e-05,
|
12307 |
+
"loss": 1.855,
|
12308 |
+
"step": 26355
|
12309 |
+
},
|
12310 |
+
{
|
12311 |
+
"epoch": 0.05135165661184223,
|
12312 |
+
"grad_norm": 3.8905303478240967,
|
12313 |
+
"learning_rate": 2.980522742473342e-05,
|
12314 |
+
"loss": 1.9127,
|
12315 |
+
"step": 26370
|
12316 |
+
},
|
12317 |
+
{
|
12318 |
+
"epoch": 0.05138086688295249,
|
12319 |
+
"grad_norm": 3.031163215637207,
|
12320 |
+
"learning_rate": 2.980500625891239e-05,
|
12321 |
+
"loss": 1.9577,
|
12322 |
+
"step": 26385
|
12323 |
+
},
|
12324 |
+
{
|
12325 |
+
"epoch": 0.05141007715406276,
|
12326 |
+
"grad_norm": 1.991543173789978,
|
12327 |
+
"learning_rate": 2.980478496841632e-05,
|
12328 |
+
"loss": 1.6416,
|
12329 |
+
"step": 26400
|
12330 |
+
},
|
12331 |
+
{
|
12332 |
+
"epoch": 0.05143928742517302,
|
12333 |
+
"grad_norm": 3.90432071685791,
|
12334 |
+
"learning_rate": 2.9804563553247076e-05,
|
12335 |
+
"loss": 1.7318,
|
12336 |
+
"step": 26415
|
12337 |
+
},
|
12338 |
+
{
|
12339 |
+
"epoch": 0.05146849769628328,
|
12340 |
+
"grad_norm": 3.582280158996582,
|
12341 |
+
"learning_rate": 2.980434201340652e-05,
|
12342 |
+
"loss": 1.7349,
|
12343 |
+
"step": 26430
|
12344 |
+
},
|
12345 |
+
{
|
12346 |
+
"epoch": 0.05149770796739355,
|
12347 |
+
"grad_norm": 3.1633496284484863,
|
12348 |
+
"learning_rate": 2.980412034889651e-05,
|
12349 |
+
"loss": 1.8023,
|
12350 |
+
"step": 26445
|
12351 |
+
},
|
12352 |
+
{
|
12353 |
+
"epoch": 0.05152691823850381,
|
12354 |
+
"grad_norm": 3.7939155101776123,
|
12355 |
+
"learning_rate": 2.9803898559718927e-05,
|
12356 |
+
"loss": 1.6626,
|
12357 |
+
"step": 26460
|
12358 |
+
},
|
12359 |
+
{
|
12360 |
+
"epoch": 0.05155612850961407,
|
12361 |
+
"grad_norm": 3.093492031097412,
|
12362 |
+
"learning_rate": 2.9803676645875634e-05,
|
12363 |
+
"loss": 1.7914,
|
12364 |
+
"step": 26475
|
12365 |
+
},
|
12366 |
+
{
|
12367 |
+
"epoch": 0.051585338780724334,
|
12368 |
+
"grad_norm": 2.4469592571258545,
|
12369 |
+
"learning_rate": 2.9803454607368493e-05,
|
12370 |
+
"loss": 1.7484,
|
12371 |
+
"step": 26490
|
12372 |
+
},
|
12373 |
+
{
|
12374 |
+
"epoch": 0.0516145490518346,
|
12375 |
+
"grad_norm": 2.613276243209839,
|
12376 |
+
"learning_rate": 2.9803232444199382e-05,
|
12377 |
+
"loss": 1.7984,
|
12378 |
+
"step": 26505
|
12379 |
+
},
|
12380 |
+
{
|
12381 |
+
"epoch": 0.05164375932294486,
|
12382 |
+
"grad_norm": 3.1793646812438965,
|
12383 |
+
"learning_rate": 2.9803010156370166e-05,
|
12384 |
+
"loss": 1.7114,
|
12385 |
+
"step": 26520
|
12386 |
+
},
|
12387 |
+
{
|
12388 |
+
"epoch": 0.051672969594055124,
|
12389 |
+
"grad_norm": 1.9021662473678589,
|
12390 |
+
"learning_rate": 2.980278774388272e-05,
|
12391 |
+
"loss": 1.6837,
|
12392 |
+
"step": 26535
|
12393 |
+
},
|
12394 |
+
{
|
12395 |
+
"epoch": 0.05170217986516539,
|
12396 |
+
"grad_norm": 2.415710687637329,
|
12397 |
+
"learning_rate": 2.9802565206738922e-05,
|
12398 |
+
"loss": 1.9818,
|
12399 |
+
"step": 26550
|
12400 |
+
},
|
12401 |
+
{
|
12402 |
+
"epoch": 0.05173139013627565,
|
12403 |
+
"grad_norm": 3.8227622509002686,
|
12404 |
+
"learning_rate": 2.9802342544940635e-05,
|
12405 |
+
"loss": 1.9691,
|
12406 |
+
"step": 26565
|
12407 |
+
},
|
12408 |
+
{
|
12409 |
+
"epoch": 0.051760600407385914,
|
12410 |
+
"grad_norm": 1.6945210695266724,
|
12411 |
+
"learning_rate": 2.980211975848974e-05,
|
12412 |
+
"loss": 1.9788,
|
12413 |
+
"step": 26580
|
12414 |
+
},
|
12415 |
+
{
|
12416 |
+
"epoch": 0.051789810678496175,
|
12417 |
+
"grad_norm": 2.1171348094940186,
|
12418 |
+
"learning_rate": 2.980189684738811e-05,
|
12419 |
+
"loss": 1.8498,
|
12420 |
+
"step": 26595
|
12421 |
+
},
|
12422 |
+
{
|
12423 |
+
"epoch": 0.05181902094960644,
|
12424 |
+
"grad_norm": 3.8129332065582275,
|
12425 |
+
"learning_rate": 2.9801673811637628e-05,
|
12426 |
+
"loss": 1.8109,
|
12427 |
+
"step": 26610
|
12428 |
+
},
|
12429 |
+
{
|
12430 |
+
"epoch": 0.0518482312207167,
|
12431 |
+
"grad_norm": 3.1358275413513184,
|
12432 |
+
"learning_rate": 2.9801450651240173e-05,
|
12433 |
+
"loss": 1.7809,
|
12434 |
+
"step": 26625
|
12435 |
+
},
|
12436 |
+
{
|
12437 |
+
"epoch": 0.051877441491826964,
|
12438 |
+
"grad_norm": 3.9287755489349365,
|
12439 |
+
"learning_rate": 2.9801227366197614e-05,
|
12440 |
+
"loss": 1.7791,
|
12441 |
+
"step": 26640
|
12442 |
+
},
|
12443 |
+
{
|
12444 |
+
"epoch": 0.05190665176293723,
|
12445 |
+
"grad_norm": 3.269742488861084,
|
12446 |
+
"learning_rate": 2.980100395651184e-05,
|
12447 |
+
"loss": 2.0053,
|
12448 |
+
"step": 26655
|
12449 |
+
},
|
12450 |
+
{
|
12451 |
+
"epoch": 0.05193586203404749,
|
12452 |
+
"grad_norm": 3.23358154296875,
|
12453 |
+
"learning_rate": 2.980078042218473e-05,
|
12454 |
+
"loss": 1.7863,
|
12455 |
+
"step": 26670
|
12456 |
+
},
|
12457 |
+
{
|
12458 |
+
"epoch": 0.051965072305157754,
|
12459 |
+
"grad_norm": 3.6313607692718506,
|
12460 |
+
"learning_rate": 2.980055676321817e-05,
|
12461 |
+
"loss": 1.8125,
|
12462 |
+
"step": 26685
|
12463 |
+
},
|
12464 |
+
{
|
12465 |
+
"epoch": 0.051994282576268015,
|
12466 |
+
"grad_norm": 2.303229331970215,
|
12467 |
+
"learning_rate": 2.9800332979614035e-05,
|
12468 |
+
"loss": 1.8336,
|
12469 |
+
"step": 26700
|
12470 |
+
},
|
12471 |
+
{
|
12472 |
+
"epoch": 0.05202349284737828,
|
12473 |
+
"grad_norm": 4.923130035400391,
|
12474 |
+
"learning_rate": 2.9800109071374216e-05,
|
12475 |
+
"loss": 1.9392,
|
12476 |
+
"step": 26715
|
12477 |
+
},
|
12478 |
+
{
|
12479 |
+
"epoch": 0.052052703118488544,
|
12480 |
+
"grad_norm": 6.160820484161377,
|
12481 |
+
"learning_rate": 2.9799885038500597e-05,
|
12482 |
+
"loss": 1.8991,
|
12483 |
+
"step": 26730
|
12484 |
+
},
|
12485 |
+
{
|
12486 |
+
"epoch": 0.052081913389598805,
|
12487 |
+
"grad_norm": 1.9204509258270264,
|
12488 |
+
"learning_rate": 2.9799660880995065e-05,
|
12489 |
+
"loss": 1.8511,
|
12490 |
+
"step": 26745
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 0.05211112366070907,
|
12494 |
+
"grad_norm": 4.1540656089782715,
|
12495 |
+
"learning_rate": 2.9799436598859507e-05,
|
12496 |
+
"loss": 1.7353,
|
12497 |
+
"step": 26760
|
12498 |
+
},
|
12499 |
+
{
|
12500 |
+
"epoch": 0.052140333931819334,
|
12501 |
+
"grad_norm": 2.5721335411071777,
|
12502 |
+
"learning_rate": 2.979921219209581e-05,
|
12503 |
+
"loss": 1.6949,
|
12504 |
+
"step": 26775
|
12505 |
+
},
|
12506 |
+
{
|
12507 |
+
"epoch": 0.052169544202929595,
|
12508 |
+
"grad_norm": 2.5524466037750244,
|
12509 |
+
"learning_rate": 2.9798987660705867e-05,
|
12510 |
+
"loss": 1.9033,
|
12511 |
+
"step": 26790
|
12512 |
+
},
|
12513 |
+
{
|
12514 |
+
"epoch": 0.052198754474039856,
|
12515 |
+
"grad_norm": 2.8870813846588135,
|
12516 |
+
"learning_rate": 2.979876300469157e-05,
|
12517 |
+
"loss": 1.6899,
|
12518 |
+
"step": 26805
|
12519 |
+
},
|
12520 |
+
{
|
12521 |
+
"epoch": 0.052227964745150124,
|
12522 |
+
"grad_norm": 2.5837631225585938,
|
12523 |
+
"learning_rate": 2.9798538224054804e-05,
|
12524 |
+
"loss": 1.7715,
|
12525 |
+
"step": 26820
|
12526 |
+
},
|
12527 |
+
{
|
12528 |
+
"epoch": 0.052257175016260385,
|
12529 |
+
"grad_norm": 2.5015158653259277,
|
12530 |
+
"learning_rate": 2.979831331879747e-05,
|
12531 |
+
"loss": 1.9121,
|
12532 |
+
"step": 26835
|
12533 |
+
},
|
12534 |
+
{
|
12535 |
+
"epoch": 0.052286385287370646,
|
12536 |
+
"grad_norm": 3.3133704662323,
|
12537 |
+
"learning_rate": 2.9798088288921457e-05,
|
12538 |
+
"loss": 1.87,
|
12539 |
+
"step": 26850
|
12540 |
+
},
|
12541 |
+
{
|
12542 |
+
"epoch": 0.05231559555848091,
|
12543 |
+
"grad_norm": 2.159090042114258,
|
12544 |
+
"learning_rate": 2.979786313442866e-05,
|
12545 |
+
"loss": 1.9665,
|
12546 |
+
"step": 26865
|
12547 |
+
},
|
12548 |
+
{
|
12549 |
+
"epoch": 0.052344805829591175,
|
12550 |
+
"grad_norm": 3.6745707988739014,
|
12551 |
+
"learning_rate": 2.9797637855320977e-05,
|
12552 |
+
"loss": 1.8956,
|
12553 |
+
"step": 26880
|
12554 |
+
},
|
12555 |
+
{
|
12556 |
+
"epoch": 0.052374016100701436,
|
12557 |
+
"grad_norm": 2.593538761138916,
|
12558 |
+
"learning_rate": 2.9797412451600305e-05,
|
12559 |
+
"loss": 1.9206,
|
12560 |
+
"step": 26895
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 0.0524032263718117,
|
12564 |
+
"grad_norm": 2.1524672508239746,
|
12565 |
+
"learning_rate": 2.979718692326854e-05,
|
12566 |
+
"loss": 2.0598,
|
12567 |
+
"step": 26910
|
12568 |
+
},
|
12569 |
+
{
|
12570 |
+
"epoch": 0.052432436642921965,
|
12571 |
+
"grad_norm": 5.403210639953613,
|
12572 |
+
"learning_rate": 2.9796961270327583e-05,
|
12573 |
+
"loss": 1.9446,
|
12574 |
+
"step": 26925
|
12575 |
+
},
|
12576 |
+
{
|
12577 |
+
"epoch": 0.052461646914032226,
|
12578 |
+
"grad_norm": 2.859320878982544,
|
12579 |
+
"learning_rate": 2.9796735492779338e-05,
|
12580 |
+
"loss": 1.6969,
|
12581 |
+
"step": 26940
|
12582 |
+
},
|
12583 |
+
{
|
12584 |
+
"epoch": 0.05249085718514249,
|
12585 |
+
"grad_norm": 2.0825371742248535,
|
12586 |
+
"learning_rate": 2.9796509590625696e-05,
|
12587 |
+
"loss": 1.8951,
|
12588 |
+
"step": 26955
|
12589 |
+
},
|
12590 |
+
{
|
12591 |
+
"epoch": 0.05252006745625275,
|
12592 |
+
"grad_norm": 2.3604981899261475,
|
12593 |
+
"learning_rate": 2.979628356386857e-05,
|
12594 |
+
"loss": 1.8861,
|
12595 |
+
"step": 26970
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 0.052549277727363015,
|
12599 |
+
"grad_norm": 3.510629415512085,
|
12600 |
+
"learning_rate": 2.9796057412509856e-05,
|
12601 |
+
"loss": 1.9885,
|
12602 |
+
"step": 26985
|
12603 |
+
},
|
12604 |
+
{
|
12605 |
+
"epoch": 0.052578487998473276,
|
12606 |
+
"grad_norm": 1.72383713722229,
|
12607 |
+
"learning_rate": 2.9795831136551467e-05,
|
12608 |
+
"loss": 1.832,
|
12609 |
+
"step": 27000
|
12610 |
+
},
|
12611 |
+
{
|
12612 |
+
"epoch": 0.05260769826958354,
|
12613 |
+
"grad_norm": 2.767523765563965,
|
12614 |
+
"learning_rate": 2.9795604735995297e-05,
|
12615 |
+
"loss": 1.8956,
|
12616 |
+
"step": 27015
|
12617 |
+
},
|
12618 |
+
{
|
12619 |
+
"epoch": 0.052636908540693805,
|
12620 |
+
"grad_norm": 2.731154203414917,
|
12621 |
+
"learning_rate": 2.979537821084326e-05,
|
12622 |
+
"loss": 1.7157,
|
12623 |
+
"step": 27030
|
12624 |
+
},
|
12625 |
+
{
|
12626 |
+
"epoch": 0.052666118811804066,
|
12627 |
+
"grad_norm": 3.17053484916687,
|
12628 |
+
"learning_rate": 2.9795151561097265e-05,
|
12629 |
+
"loss": 1.9691,
|
12630 |
+
"step": 27045
|
12631 |
+
},
|
12632 |
+
{
|
12633 |
+
"epoch": 0.05269532908291433,
|
12634 |
+
"grad_norm": 3.9647130966186523,
|
12635 |
+
"learning_rate": 2.979492478675922e-05,
|
12636 |
+
"loss": 1.9736,
|
12637 |
+
"step": 27060
|
12638 |
+
},
|
12639 |
+
{
|
12640 |
+
"epoch": 0.05272453935402459,
|
12641 |
+
"grad_norm": 2.4074623584747314,
|
12642 |
+
"learning_rate": 2.9794697887831027e-05,
|
12643 |
+
"loss": 1.8325,
|
12644 |
+
"step": 27075
|
12645 |
+
},
|
12646 |
+
{
|
12647 |
+
"epoch": 0.052753749625134856,
|
12648 |
+
"grad_norm": 4.785901069641113,
|
12649 |
+
"learning_rate": 2.9794470864314603e-05,
|
12650 |
+
"loss": 1.9717,
|
12651 |
+
"step": 27090
|
12652 |
+
},
|
12653 |
+
{
|
12654 |
+
"epoch": 0.05278295989624512,
|
12655 |
+
"grad_norm": 5.4298577308654785,
|
12656 |
+
"learning_rate": 2.979424371621186e-05,
|
12657 |
+
"loss": 1.8316,
|
12658 |
+
"step": 27105
|
12659 |
+
},
|
12660 |
+
{
|
12661 |
+
"epoch": 0.05281217016735538,
|
12662 |
+
"grad_norm": 2.509413003921509,
|
12663 |
+
"learning_rate": 2.9794016443524713e-05,
|
12664 |
+
"loss": 1.8792,
|
12665 |
+
"step": 27120
|
12666 |
+
},
|
12667 |
+
{
|
12668 |
+
"epoch": 0.052841380438465646,
|
12669 |
+
"grad_norm": 1.903182029724121,
|
12670 |
+
"learning_rate": 2.979378904625507e-05,
|
12671 |
+
"loss": 1.8049,
|
12672 |
+
"step": 27135
|
12673 |
+
},
|
12674 |
+
{
|
12675 |
+
"epoch": 0.05287059070957591,
|
12676 |
+
"grad_norm": 3.3434927463531494,
|
12677 |
+
"learning_rate": 2.9793561524404846e-05,
|
12678 |
+
"loss": 1.7794,
|
12679 |
+
"step": 27150
|
12680 |
+
},
|
12681 |
+
{
|
12682 |
+
"epoch": 0.05289980098068617,
|
12683 |
+
"grad_norm": 5.064967632293701,
|
12684 |
+
"learning_rate": 2.9793333877975964e-05,
|
12685 |
+
"loss": 1.8726,
|
12686 |
+
"step": 27165
|
12687 |
+
},
|
12688 |
+
{
|
12689 |
+
"epoch": 0.05292901125179643,
|
12690 |
+
"grad_norm": 1.8450191020965576,
|
12691 |
+
"learning_rate": 2.9793106106970335e-05,
|
12692 |
+
"loss": 1.8586,
|
12693 |
+
"step": 27180
|
12694 |
+
},
|
12695 |
+
{
|
12696 |
+
"epoch": 0.0529582215229067,
|
12697 |
+
"grad_norm": 2.540570020675659,
|
12698 |
+
"learning_rate": 2.979287821138988e-05,
|
12699 |
+
"loss": 1.8988,
|
12700 |
+
"step": 27195
|
12701 |
+
},
|
12702 |
+
{
|
12703 |
+
"epoch": 0.05298743179401696,
|
12704 |
+
"grad_norm": 2.0893425941467285,
|
12705 |
+
"learning_rate": 2.9792650191236516e-05,
|
12706 |
+
"loss": 1.7794,
|
12707 |
+
"step": 27210
|
12708 |
+
},
|
12709 |
+
{
|
12710 |
+
"epoch": 0.05301664206512722,
|
12711 |
+
"grad_norm": 2.7562851905822754,
|
12712 |
+
"learning_rate": 2.979242204651216e-05,
|
12713 |
+
"loss": 1.7025,
|
12714 |
+
"step": 27225
|
12715 |
+
},
|
12716 |
+
{
|
12717 |
+
"epoch": 0.05304585233623749,
|
12718 |
+
"grad_norm": 4.634995937347412,
|
12719 |
+
"learning_rate": 2.9792193777218743e-05,
|
12720 |
+
"loss": 1.8236,
|
12721 |
+
"step": 27240
|
12722 |
+
},
|
12723 |
+
{
|
12724 |
+
"epoch": 0.05307506260734775,
|
12725 |
+
"grad_norm": 3.1855075359344482,
|
12726 |
+
"learning_rate": 2.9791965383358184e-05,
|
12727 |
+
"loss": 1.8439,
|
12728 |
+
"step": 27255
|
12729 |
+
},
|
12730 |
+
{
|
12731 |
+
"epoch": 0.05310427287845801,
|
12732 |
+
"grad_norm": 3.3286306858062744,
|
12733 |
+
"learning_rate": 2.9791736864932403e-05,
|
12734 |
+
"loss": 1.9314,
|
12735 |
+
"step": 27270
|
12736 |
+
},
|
12737 |
+
{
|
12738 |
+
"epoch": 0.05313348314956827,
|
12739 |
+
"grad_norm": 3.1028332710266113,
|
12740 |
+
"learning_rate": 2.979150822194332e-05,
|
12741 |
+
"loss": 2.0257,
|
12742 |
+
"step": 27285
|
12743 |
+
},
|
12744 |
+
{
|
12745 |
+
"epoch": 0.05316269342067854,
|
12746 |
+
"grad_norm": 4.1933393478393555,
|
12747 |
+
"learning_rate": 2.979127945439287e-05,
|
12748 |
+
"loss": 1.9908,
|
12749 |
+
"step": 27300
|
12750 |
+
},
|
12751 |
+
{
|
12752 |
+
"epoch": 0.0531919036917888,
|
12753 |
+
"grad_norm": 4.206679344177246,
|
12754 |
+
"learning_rate": 2.9791050562282974e-05,
|
12755 |
+
"loss": 1.8144,
|
12756 |
+
"step": 27315
|
12757 |
+
},
|
12758 |
+
{
|
12759 |
+
"epoch": 0.05322111396289906,
|
12760 |
+
"grad_norm": 2.231621265411377,
|
12761 |
+
"learning_rate": 2.9790821545615562e-05,
|
12762 |
+
"loss": 1.824,
|
12763 |
+
"step": 27330
|
12764 |
+
},
|
12765 |
+
{
|
12766 |
+
"epoch": 0.05325032423400932,
|
12767 |
+
"grad_norm": 3.625483512878418,
|
12768 |
+
"learning_rate": 2.9790592404392557e-05,
|
12769 |
+
"loss": 2.0087,
|
12770 |
+
"step": 27345
|
12771 |
+
},
|
12772 |
+
{
|
12773 |
+
"epoch": 0.05327953450511959,
|
12774 |
+
"grad_norm": 4.063029766082764,
|
12775 |
+
"learning_rate": 2.9790363138615902e-05,
|
12776 |
+
"loss": 1.8927,
|
12777 |
+
"step": 27360
|
12778 |
+
},
|
12779 |
+
{
|
12780 |
+
"epoch": 0.05330874477622985,
|
12781 |
+
"grad_norm": 4.166107654571533,
|
12782 |
+
"learning_rate": 2.979013374828751e-05,
|
12783 |
+
"loss": 1.9004,
|
12784 |
+
"step": 27375
|
12785 |
+
},
|
12786 |
+
{
|
12787 |
+
"epoch": 0.05333795504734011,
|
12788 |
+
"grad_norm": 2.737416982650757,
|
12789 |
+
"learning_rate": 2.9789904233409326e-05,
|
12790 |
+
"loss": 1.9678,
|
12791 |
+
"step": 27390
|
12792 |
+
},
|
12793 |
+
{
|
12794 |
+
"epoch": 0.05336716531845038,
|
12795 |
+
"grad_norm": 2.131272315979004,
|
12796 |
+
"learning_rate": 2.9789674593983277e-05,
|
12797 |
+
"loss": 1.895,
|
12798 |
+
"step": 27405
|
12799 |
+
},
|
12800 |
+
{
|
12801 |
+
"epoch": 0.05339637558956064,
|
12802 |
+
"grad_norm": 2.983872890472412,
|
12803 |
+
"learning_rate": 2.9789444830011302e-05,
|
12804 |
+
"loss": 1.7807,
|
12805 |
+
"step": 27420
|
12806 |
+
},
|
12807 |
+
{
|
12808 |
+
"epoch": 0.0534255858606709,
|
12809 |
+
"grad_norm": 5.785390377044678,
|
12810 |
+
"learning_rate": 2.978921494149533e-05,
|
12811 |
+
"loss": 1.9559,
|
12812 |
+
"step": 27435
|
12813 |
+
},
|
12814 |
+
{
|
12815 |
+
"epoch": 0.05345479613178116,
|
12816 |
+
"grad_norm": 2.7100813388824463,
|
12817 |
+
"learning_rate": 2.9788984928437298e-05,
|
12818 |
+
"loss": 1.9147,
|
12819 |
+
"step": 27450
|
12820 |
+
},
|
12821 |
+
{
|
12822 |
+
"epoch": 0.05348400640289143,
|
12823 |
+
"grad_norm": 2.0759334564208984,
|
12824 |
+
"learning_rate": 2.978875479083914e-05,
|
12825 |
+
"loss": 1.7345,
|
12826 |
+
"step": 27465
|
12827 |
+
},
|
12828 |
+
{
|
12829 |
+
"epoch": 0.05351321667400169,
|
12830 |
+
"grad_norm": 4.645893573760986,
|
12831 |
+
"learning_rate": 2.9788524528702804e-05,
|
12832 |
+
"loss": 1.7246,
|
12833 |
+
"step": 27480
|
12834 |
+
},
|
12835 |
+
{
|
12836 |
+
"epoch": 0.05354242694511195,
|
12837 |
+
"grad_norm": 3.1474320888519287,
|
12838 |
+
"learning_rate": 2.9788294142030225e-05,
|
12839 |
+
"loss": 1.8679,
|
12840 |
+
"step": 27495
|
12841 |
+
},
|
12842 |
+
{
|
12843 |
+
"epoch": 0.05357163721622222,
|
12844 |
+
"grad_norm": 2.8445558547973633,
|
12845 |
+
"learning_rate": 2.9788063630823335e-05,
|
12846 |
+
"loss": 1.748,
|
12847 |
+
"step": 27510
|
12848 |
+
},
|
12849 |
+
{
|
12850 |
+
"epoch": 0.05360084748733248,
|
12851 |
+
"grad_norm": 3.176330089569092,
|
12852 |
+
"learning_rate": 2.978783299508408e-05,
|
12853 |
+
"loss": 1.9698,
|
12854 |
+
"step": 27525
|
12855 |
+
},
|
12856 |
+
{
|
12857 |
+
"epoch": 0.05363005775844274,
|
12858 |
+
"grad_norm": 2.875300884246826,
|
12859 |
+
"learning_rate": 2.9787602234814407e-05,
|
12860 |
+
"loss": 1.7617,
|
12861 |
+
"step": 27540
|
12862 |
+
},
|
12863 |
+
{
|
12864 |
+
"epoch": 0.053659268029553,
|
12865 |
+
"grad_norm": 4.004800796508789,
|
12866 |
+
"learning_rate": 2.978737135001626e-05,
|
12867 |
+
"loss": 1.8204,
|
12868 |
+
"step": 27555
|
12869 |
+
},
|
12870 |
+
{
|
12871 |
+
"epoch": 0.05368847830066327,
|
12872 |
+
"grad_norm": 2.3588531017303467,
|
12873 |
+
"learning_rate": 2.9787140340691574e-05,
|
12874 |
+
"loss": 1.8028,
|
12875 |
+
"step": 27570
|
12876 |
+
},
|
12877 |
+
{
|
12878 |
+
"epoch": 0.05371768857177353,
|
12879 |
+
"grad_norm": 2.293210744857788,
|
12880 |
+
"learning_rate": 2.9786909206842297e-05,
|
12881 |
+
"loss": 1.9525,
|
12882 |
+
"step": 27585
|
12883 |
+
},
|
12884 |
+
{
|
12885 |
+
"epoch": 0.05374689884288379,
|
12886 |
+
"grad_norm": 3.4703333377838135,
|
12887 |
+
"learning_rate": 2.9786677948470382e-05,
|
12888 |
+
"loss": 1.8144,
|
12889 |
+
"step": 27600
|
12890 |
+
},
|
12891 |
+
{
|
12892 |
+
"epoch": 0.05377610911399406,
|
12893 |
+
"grad_norm": 2.1387853622436523,
|
12894 |
+
"learning_rate": 2.9786446565577772e-05,
|
12895 |
+
"loss": 1.7853,
|
12896 |
+
"step": 27615
|
12897 |
+
},
|
12898 |
+
{
|
12899 |
+
"epoch": 0.05380531938510432,
|
12900 |
+
"grad_norm": 1.9415435791015625,
|
12901 |
+
"learning_rate": 2.9786215058166417e-05,
|
12902 |
+
"loss": 1.8912,
|
12903 |
+
"step": 27630
|
12904 |
+
},
|
12905 |
+
{
|
12906 |
+
"epoch": 0.05383452965621458,
|
12907 |
+
"grad_norm": 3.315534830093384,
|
12908 |
+
"learning_rate": 2.978598342623826e-05,
|
12909 |
+
"loss": 1.7991,
|
12910 |
+
"step": 27645
|
12911 |
+
},
|
12912 |
+
{
|
12913 |
+
"epoch": 0.05386373992732484,
|
12914 |
+
"grad_norm": 3.2139084339141846,
|
12915 |
+
"learning_rate": 2.9785751669795265e-05,
|
12916 |
+
"loss": 1.8284,
|
12917 |
+
"step": 27660
|
12918 |
+
},
|
12919 |
+
{
|
12920 |
+
"epoch": 0.05389295019843511,
|
12921 |
+
"grad_norm": 2.0407660007476807,
|
12922 |
+
"learning_rate": 2.9785519788839368e-05,
|
12923 |
+
"loss": 1.8263,
|
12924 |
+
"step": 27675
|
12925 |
+
},
|
12926 |
+
{
|
12927 |
+
"epoch": 0.05392216046954537,
|
12928 |
+
"grad_norm": 3.6719486713409424,
|
12929 |
+
"learning_rate": 2.9785287783372538e-05,
|
12930 |
+
"loss": 1.7552,
|
12931 |
+
"step": 27690
|
12932 |
+
},
|
12933 |
+
{
|
12934 |
+
"epoch": 0.05395137074065563,
|
12935 |
+
"grad_norm": 2.3214800357818604,
|
12936 |
+
"learning_rate": 2.978505565339671e-05,
|
12937 |
+
"loss": 1.7752,
|
12938 |
+
"step": 27705
|
12939 |
+
},
|
12940 |
+
{
|
12941 |
+
"epoch": 0.0539805810117659,
|
12942 |
+
"grad_norm": 3.5290887355804443,
|
12943 |
+
"learning_rate": 2.9784823398913856e-05,
|
12944 |
+
"loss": 1.6684,
|
12945 |
+
"step": 27720
|
12946 |
+
},
|
12947 |
+
{
|
12948 |
+
"epoch": 0.05400979128287616,
|
12949 |
+
"grad_norm": 2.344564437866211,
|
12950 |
+
"learning_rate": 2.978459101992592e-05,
|
12951 |
+
"loss": 1.7649,
|
12952 |
+
"step": 27735
|
12953 |
+
},
|
12954 |
+
{
|
12955 |
+
"epoch": 0.05403900155398642,
|
12956 |
+
"grad_norm": 3.5880258083343506,
|
12957 |
+
"learning_rate": 2.9784358516434867e-05,
|
12958 |
+
"loss": 1.8194,
|
12959 |
+
"step": 27750
|
12960 |
+
},
|
12961 |
+
{
|
12962 |
+
"epoch": 0.05406821182509668,
|
12963 |
+
"grad_norm": 4.46945858001709,
|
12964 |
+
"learning_rate": 2.978412588844265e-05,
|
12965 |
+
"loss": 1.9552,
|
12966 |
+
"step": 27765
|
12967 |
+
},
|
12968 |
+
{
|
12969 |
+
"epoch": 0.05409742209620695,
|
12970 |
+
"grad_norm": 2.9063570499420166,
|
12971 |
+
"learning_rate": 2.978389313595123e-05,
|
12972 |
+
"loss": 1.9078,
|
12973 |
+
"step": 27780
|
12974 |
+
},
|
12975 |
+
{
|
12976 |
+
"epoch": 0.05412663236731721,
|
12977 |
+
"grad_norm": 4.740022659301758,
|
12978 |
+
"learning_rate": 2.9783660258962568e-05,
|
12979 |
+
"loss": 1.8773,
|
12980 |
+
"step": 27795
|
12981 |
+
},
|
12982 |
+
{
|
12983 |
+
"epoch": 0.05415584263842747,
|
12984 |
+
"grad_norm": 5.9590654373168945,
|
12985 |
+
"learning_rate": 2.9783427257478623e-05,
|
12986 |
+
"loss": 2.0891,
|
12987 |
+
"step": 27810
|
12988 |
+
},
|
12989 |
+
{
|
12990 |
+
"epoch": 0.05418505290953774,
|
12991 |
+
"grad_norm": 3.4905142784118652,
|
12992 |
+
"learning_rate": 2.978319413150136e-05,
|
12993 |
+
"loss": 1.912,
|
12994 |
+
"step": 27825
|
12995 |
+
},
|
12996 |
+
{
|
12997 |
+
"epoch": 0.054214263180648,
|
12998 |
+
"grad_norm": 4.312283039093018,
|
12999 |
+
"learning_rate": 2.978296088103273e-05,
|
13000 |
+
"loss": 1.7969,
|
13001 |
+
"step": 27840
|
13002 |
+
},
|
13003 |
+
{
|
13004 |
+
"epoch": 0.05424347345175826,
|
13005 |
+
"grad_norm": 3.419679880142212,
|
13006 |
+
"learning_rate": 2.978272750607472e-05,
|
13007 |
+
"loss": 2.0149,
|
13008 |
+
"step": 27855
|
13009 |
+
},
|
13010 |
+
{
|
13011 |
+
"epoch": 0.054272683722868524,
|
13012 |
+
"grad_norm": 1.4746593236923218,
|
13013 |
+
"learning_rate": 2.9782494006629275e-05,
|
13014 |
+
"loss": 1.7991,
|
13015 |
+
"step": 27870
|
13016 |
+
},
|
13017 |
+
{
|
13018 |
+
"epoch": 0.05430189399397879,
|
13019 |
+
"grad_norm": 2.6810593605041504,
|
13020 |
+
"learning_rate": 2.9782260382698374e-05,
|
13021 |
+
"loss": 1.8557,
|
13022 |
+
"step": 27885
|
13023 |
+
},
|
13024 |
+
{
|
13025 |
+
"epoch": 0.05433110426508905,
|
13026 |
+
"grad_norm": 4.144235134124756,
|
13027 |
+
"learning_rate": 2.9782026634283975e-05,
|
13028 |
+
"loss": 2.0213,
|
13029 |
+
"step": 27900
|
13030 |
+
},
|
13031 |
+
{
|
13032 |
+
"epoch": 0.054360314536199314,
|
13033 |
+
"grad_norm": 3.5831942558288574,
|
13034 |
+
"learning_rate": 2.9781792761388055e-05,
|
13035 |
+
"loss": 1.7606,
|
13036 |
+
"step": 27915
|
13037 |
+
},
|
13038 |
+
{
|
13039 |
+
"epoch": 0.054389524807309575,
|
13040 |
+
"grad_norm": 3.201408863067627,
|
13041 |
+
"learning_rate": 2.9781558764012573e-05,
|
13042 |
+
"loss": 1.7619,
|
13043 |
+
"step": 27930
|
13044 |
+
},
|
13045 |
+
{
|
13046 |
+
"epoch": 0.05441873507841984,
|
13047 |
+
"grad_norm": 2.8114073276519775,
|
13048 |
+
"learning_rate": 2.978132464215951e-05,
|
13049 |
+
"loss": 1.9014,
|
13050 |
+
"step": 27945
|
13051 |
+
},
|
13052 |
+
{
|
13053 |
+
"epoch": 0.054447945349530104,
|
13054 |
+
"grad_norm": 2.400465726852417,
|
13055 |
+
"learning_rate": 2.9781090395830834e-05,
|
13056 |
+
"loss": 1.9041,
|
13057 |
+
"step": 27960
|
13058 |
+
},
|
13059 |
+
{
|
13060 |
+
"epoch": 0.054477155620640365,
|
13061 |
+
"grad_norm": 2.1496269702911377,
|
13062 |
+
"learning_rate": 2.9780856025028513e-05,
|
13063 |
+
"loss": 1.7252,
|
13064 |
+
"step": 27975
|
13065 |
+
},
|
13066 |
+
{
|
13067 |
+
"epoch": 0.05450636589175063,
|
13068 |
+
"grad_norm": 3.477867603302002,
|
13069 |
+
"learning_rate": 2.978062152975453e-05,
|
13070 |
+
"loss": 1.7466,
|
13071 |
+
"step": 27990
|
13072 |
+
},
|
13073 |
+
{
|
13074 |
+
"epoch": 0.054535576162860894,
|
13075 |
+
"grad_norm": 2.1709723472595215,
|
13076 |
+
"learning_rate": 2.978038691001085e-05,
|
13077 |
+
"loss": 1.6808,
|
13078 |
+
"step": 28005
|
13079 |
+
},
|
13080 |
+
{
|
13081 |
+
"epoch": 0.054564786433971155,
|
13082 |
+
"grad_norm": 2.13655686378479,
|
13083 |
+
"learning_rate": 2.978015216579945e-05,
|
13084 |
+
"loss": 1.8089,
|
13085 |
+
"step": 28020
|
13086 |
+
},
|
13087 |
+
{
|
13088 |
+
"epoch": 0.054593996705081416,
|
13089 |
+
"grad_norm": 3.563598394393921,
|
13090 |
+
"learning_rate": 2.9779917297122318e-05,
|
13091 |
+
"loss": 1.8368,
|
13092 |
+
"step": 28035
|
13093 |
+
},
|
13094 |
+
{
|
13095 |
+
"epoch": 0.054623206976191684,
|
13096 |
+
"grad_norm": 3.1836514472961426,
|
13097 |
+
"learning_rate": 2.977968230398142e-05,
|
13098 |
+
"loss": 1.8042,
|
13099 |
+
"step": 28050
|
13100 |
+
},
|
13101 |
+
{
|
13102 |
+
"epoch": 0.054652417247301945,
|
13103 |
+
"grad_norm": 2.4931259155273438,
|
13104 |
+
"learning_rate": 2.9779447186378738e-05,
|
13105 |
+
"loss": 1.8061,
|
13106 |
+
"step": 28065
|
13107 |
+
},
|
13108 |
+
{
|
13109 |
+
"epoch": 0.054681627518412206,
|
13110 |
+
"grad_norm": 3.2399775981903076,
|
13111 |
+
"learning_rate": 2.977921194431625e-05,
|
13112 |
+
"loss": 1.8723,
|
13113 |
+
"step": 28080
|
13114 |
+
},
|
13115 |
+
{
|
13116 |
+
"epoch": 0.05471083778952247,
|
13117 |
+
"grad_norm": 3.281590461730957,
|
13118 |
+
"learning_rate": 2.977897657779594e-05,
|
13119 |
+
"loss": 1.8882,
|
13120 |
+
"step": 28095
|
13121 |
+
},
|
13122 |
+
{
|
13123 |
+
"epoch": 0.054740048060632734,
|
13124 |
+
"grad_norm": 2.607039451599121,
|
13125 |
+
"learning_rate": 2.9778741086819795e-05,
|
13126 |
+
"loss": 1.8796,
|
13127 |
+
"step": 28110
|
13128 |
+
},
|
13129 |
+
{
|
13130 |
+
"epoch": 0.054769258331742995,
|
13131 |
+
"grad_norm": 3.4543254375457764,
|
13132 |
+
"learning_rate": 2.977850547138979e-05,
|
13133 |
+
"loss": 1.8826,
|
13134 |
+
"step": 28125
|
13135 |
+
},
|
13136 |
+
{
|
13137 |
+
"epoch": 0.054798468602853256,
|
13138 |
+
"grad_norm": 3.8168885707855225,
|
13139 |
+
"learning_rate": 2.9778269731507914e-05,
|
13140 |
+
"loss": 1.9827,
|
13141 |
+
"step": 28140
|
13142 |
+
},
|
13143 |
+
{
|
13144 |
+
"epoch": 0.054827678873963524,
|
13145 |
+
"grad_norm": 2.7308406829833984,
|
13146 |
+
"learning_rate": 2.977803386717615e-05,
|
13147 |
+
"loss": 1.768,
|
13148 |
+
"step": 28155
|
13149 |
+
},
|
13150 |
+
{
|
13151 |
+
"epoch": 0.054856889145073785,
|
13152 |
+
"grad_norm": 4.276648998260498,
|
13153 |
+
"learning_rate": 2.9777797878396477e-05,
|
13154 |
+
"loss": 1.8696,
|
13155 |
+
"step": 28170
|
13156 |
+
},
|
13157 |
+
{
|
13158 |
+
"epoch": 0.054886099416184046,
|
13159 |
+
"grad_norm": 3.1874961853027344,
|
13160 |
+
"learning_rate": 2.97775617651709e-05,
|
13161 |
+
"loss": 1.881,
|
13162 |
+
"step": 28185
|
13163 |
+
},
|
13164 |
+
{
|
13165 |
+
"epoch": 0.054915309687294314,
|
13166 |
+
"grad_norm": 2.4808239936828613,
|
13167 |
+
"learning_rate": 2.977732552750139e-05,
|
13168 |
+
"loss": 1.7825,
|
13169 |
+
"step": 28200
|
13170 |
+
},
|
13171 |
+
{
|
13172 |
+
"epoch": 0.054944519958404575,
|
13173 |
+
"grad_norm": 3.0970373153686523,
|
13174 |
+
"learning_rate": 2.9777089165389942e-05,
|
13175 |
+
"loss": 1.7825,
|
13176 |
+
"step": 28215
|
13177 |
+
},
|
13178 |
+
{
|
13179 |
+
"epoch": 0.054973730229514836,
|
13180 |
+
"grad_norm": 1.757534384727478,
|
13181 |
+
"learning_rate": 2.9776852678838555e-05,
|
13182 |
+
"loss": 2.0145,
|
13183 |
+
"step": 28230
|
13184 |
+
},
|
13185 |
+
{
|
13186 |
+
"epoch": 0.0550029405006251,
|
13187 |
+
"grad_norm": 2.351555347442627,
|
13188 |
+
"learning_rate": 2.977661606784921e-05,
|
13189 |
+
"loss": 2.0268,
|
13190 |
+
"step": 28245
|
13191 |
+
},
|
13192 |
+
{
|
13193 |
+
"epoch": 0.055032150771735365,
|
13194 |
+
"grad_norm": 2.4561007022857666,
|
13195 |
+
"learning_rate": 2.9776379332423902e-05,
|
13196 |
+
"loss": 1.7745,
|
13197 |
+
"step": 28260
|
13198 |
+
},
|
13199 |
+
{
|
13200 |
+
"epoch": 0.055061361042845626,
|
13201 |
+
"grad_norm": 3.8144009113311768,
|
13202 |
+
"learning_rate": 2.9776142472564624e-05,
|
13203 |
+
"loss": 1.8408,
|
13204 |
+
"step": 28275
|
13205 |
+
},
|
13206 |
+
{
|
13207 |
+
"epoch": 0.05509057131395589,
|
13208 |
+
"grad_norm": 4.969006061553955,
|
13209 |
+
"learning_rate": 2.9775905488273373e-05,
|
13210 |
+
"loss": 1.9223,
|
13211 |
+
"step": 28290
|
13212 |
+
},
|
13213 |
+
{
|
13214 |
+
"epoch": 0.055119781585066155,
|
13215 |
+
"grad_norm": 5.71866512298584,
|
13216 |
+
"learning_rate": 2.9775668379552146e-05,
|
13217 |
+
"loss": 1.842,
|
13218 |
+
"step": 28305
|
13219 |
+
},
|
13220 |
+
{
|
13221 |
+
"epoch": 0.055148991856176416,
|
13222 |
+
"grad_norm": 1.9934380054473877,
|
13223 |
+
"learning_rate": 2.9775431146402937e-05,
|
13224 |
+
"loss": 1.976,
|
13225 |
+
"step": 28320
|
13226 |
+
},
|
13227 |
+
{
|
13228 |
+
"epoch": 0.05517820212728668,
|
13229 |
+
"grad_norm": 1.7895939350128174,
|
13230 |
+
"learning_rate": 2.9775193788827743e-05,
|
13231 |
+
"loss": 2.0921,
|
13232 |
+
"step": 28335
|
13233 |
+
},
|
13234 |
+
{
|
13235 |
+
"epoch": 0.05520741239839694,
|
13236 |
+
"grad_norm": 4.20900821685791,
|
13237 |
+
"learning_rate": 2.9774956306828566e-05,
|
13238 |
+
"loss": 1.9333,
|
13239 |
+
"step": 28350
|
13240 |
+
},
|
13241 |
+
{
|
13242 |
+
"epoch": 0.055236622669507206,
|
13243 |
+
"grad_norm": 1.7822163105010986,
|
13244 |
+
"learning_rate": 2.97747187004074e-05,
|
13245 |
+
"loss": 2.0872,
|
13246 |
+
"step": 28365
|
13247 |
+
},
|
13248 |
+
{
|
13249 |
+
"epoch": 0.05526583294061747,
|
13250 |
+
"grad_norm": 1.7469080686569214,
|
13251 |
+
"learning_rate": 2.9774480969566254e-05,
|
13252 |
+
"loss": 1.8781,
|
13253 |
+
"step": 28380
|
13254 |
+
},
|
13255 |
+
{
|
13256 |
+
"epoch": 0.05529504321172773,
|
13257 |
+
"grad_norm": 3.0533454418182373,
|
13258 |
+
"learning_rate": 2.977424311430712e-05,
|
13259 |
+
"loss": 1.7184,
|
13260 |
+
"step": 28395
|
13261 |
+
},
|
13262 |
+
{
|
13263 |
+
"epoch": 0.05532425348283799,
|
13264 |
+
"grad_norm": 2.729780912399292,
|
13265 |
+
"learning_rate": 2.977400513463201e-05,
|
13266 |
+
"loss": 1.7794,
|
13267 |
+
"step": 28410
|
13268 |
+
},
|
13269 |
+
{
|
13270 |
+
"epoch": 0.05535346375394826,
|
13271 |
+
"grad_norm": 3.2360620498657227,
|
13272 |
+
"learning_rate": 2.9773767030542926e-05,
|
13273 |
+
"loss": 1.7711,
|
13274 |
+
"step": 28425
|
13275 |
+
},
|
13276 |
+
{
|
13277 |
+
"epoch": 0.05538267402505852,
|
13278 |
+
"grad_norm": 2.3599355220794678,
|
13279 |
+
"learning_rate": 2.9773528802041873e-05,
|
13280 |
+
"loss": 1.8758,
|
13281 |
+
"step": 28440
|
13282 |
+
},
|
13283 |
+
{
|
13284 |
+
"epoch": 0.05541188429616878,
|
13285 |
+
"grad_norm": 3.3827106952667236,
|
13286 |
+
"learning_rate": 2.9773290449130856e-05,
|
13287 |
+
"loss": 1.901,
|
13288 |
+
"step": 28455
|
13289 |
+
},
|
13290 |
+
{
|
13291 |
+
"epoch": 0.055441094567279046,
|
13292 |
+
"grad_norm": 2.9506921768188477,
|
13293 |
+
"learning_rate": 2.977305197181188e-05,
|
13294 |
+
"loss": 1.9286,
|
13295 |
+
"step": 28470
|
13296 |
+
},
|
13297 |
+
{
|
13298 |
+
"epoch": 0.05547030483838931,
|
13299 |
+
"grad_norm": 3.381622791290283,
|
13300 |
+
"learning_rate": 2.9772813370086956e-05,
|
13301 |
+
"loss": 2.0169,
|
13302 |
+
"step": 28485
|
13303 |
+
},
|
13304 |
+
{
|
13305 |
+
"epoch": 0.05549951510949957,
|
13306 |
+
"grad_norm": 3.6618142127990723,
|
13307 |
+
"learning_rate": 2.9772574643958095e-05,
|
13308 |
+
"loss": 1.8318,
|
13309 |
+
"step": 28500
|
13310 |
+
},
|
13311 |
+
{
|
13312 |
+
"epoch": 0.05552872538060983,
|
13313 |
+
"grad_norm": 2.906064033508301,
|
13314 |
+
"learning_rate": 2.9772335793427304e-05,
|
13315 |
+
"loss": 1.8778,
|
13316 |
+
"step": 28515
|
13317 |
+
},
|
13318 |
+
{
|
13319 |
+
"epoch": 0.0555579356517201,
|
13320 |
+
"grad_norm": 1.9280356168746948,
|
13321 |
+
"learning_rate": 2.9772096818496592e-05,
|
13322 |
+
"loss": 1.9095,
|
13323 |
+
"step": 28530
|
13324 |
+
},
|
13325 |
+
{
|
13326 |
+
"epoch": 0.05558714592283036,
|
13327 |
+
"grad_norm": 2.451441764831543,
|
13328 |
+
"learning_rate": 2.977185771916798e-05,
|
13329 |
+
"loss": 1.7975,
|
13330 |
+
"step": 28545
|
13331 |
+
},
|
13332 |
+
{
|
13333 |
+
"epoch": 0.05561635619394062,
|
13334 |
+
"grad_norm": 3.4363293647766113,
|
13335 |
+
"learning_rate": 2.9771618495443473e-05,
|
13336 |
+
"loss": 1.9995,
|
13337 |
+
"step": 28560
|
13338 |
+
},
|
13339 |
+
{
|
13340 |
+
"epoch": 0.05564556646505089,
|
13341 |
+
"grad_norm": 3.402430772781372,
|
13342 |
+
"learning_rate": 2.9771379147325095e-05,
|
13343 |
+
"loss": 1.879,
|
13344 |
+
"step": 28575
|
13345 |
+
},
|
13346 |
+
{
|
13347 |
+
"epoch": 0.05567477673616115,
|
13348 |
+
"grad_norm": 3.7891762256622314,
|
13349 |
+
"learning_rate": 2.977113967481485e-05,
|
13350 |
+
"loss": 1.8275,
|
13351 |
+
"step": 28590
|
13352 |
+
},
|
13353 |
+
{
|
13354 |
+
"epoch": 0.05570398700727141,
|
13355 |
+
"grad_norm": 2.45809268951416,
|
13356 |
+
"learning_rate": 2.977090007791476e-05,
|
13357 |
+
"loss": 1.8131,
|
13358 |
+
"step": 28605
|
13359 |
+
},
|
13360 |
+
{
|
13361 |
+
"epoch": 0.05573319727838167,
|
13362 |
+
"grad_norm": 3.5447278022766113,
|
13363 |
+
"learning_rate": 2.9770660356626848e-05,
|
13364 |
+
"loss": 1.8373,
|
13365 |
+
"step": 28620
|
13366 |
+
},
|
13367 |
+
{
|
13368 |
+
"epoch": 0.05576240754949194,
|
13369 |
+
"grad_norm": 3.397735595703125,
|
13370 |
+
"learning_rate": 2.9770420510953124e-05,
|
13371 |
+
"loss": 1.7907,
|
13372 |
+
"step": 28635
|
13373 |
+
},
|
13374 |
+
{
|
13375 |
+
"epoch": 0.0557916178206022,
|
13376 |
+
"grad_norm": 2.185011386871338,
|
13377 |
+
"learning_rate": 2.9770180540895613e-05,
|
13378 |
+
"loss": 1.8909,
|
13379 |
+
"step": 28650
|
13380 |
+
},
|
13381 |
+
{
|
13382 |
+
"epoch": 0.05582082809171246,
|
13383 |
+
"grad_norm": 3.66780161857605,
|
13384 |
+
"learning_rate": 2.9769940446456332e-05,
|
13385 |
+
"loss": 1.8898,
|
13386 |
+
"step": 28665
|
13387 |
+
},
|
13388 |
+
{
|
13389 |
+
"epoch": 0.05585003836282273,
|
13390 |
+
"grad_norm": 3.8541507720947266,
|
13391 |
+
"learning_rate": 2.9769700227637307e-05,
|
13392 |
+
"loss": 1.8156,
|
13393 |
+
"step": 28680
|
13394 |
+
},
|
13395 |
+
{
|
13396 |
+
"epoch": 0.05587924863393299,
|
13397 |
+
"grad_norm": 2.504997491836548,
|
13398 |
+
"learning_rate": 2.9769459884440563e-05,
|
13399 |
+
"loss": 1.8584,
|
13400 |
+
"step": 28695
|
13401 |
+
},
|
13402 |
+
{
|
13403 |
+
"epoch": 0.05590845890504325,
|
13404 |
+
"grad_norm": 2.811286687850952,
|
13405 |
+
"learning_rate": 2.9769219416868114e-05,
|
13406 |
+
"loss": 1.8762,
|
13407 |
+
"step": 28710
|
13408 |
+
},
|
13409 |
+
{
|
13410 |
+
"epoch": 0.05593766917615351,
|
13411 |
+
"grad_norm": 3.2774808406829834,
|
13412 |
+
"learning_rate": 2.976897882492199e-05,
|
13413 |
+
"loss": 1.8637,
|
13414 |
+
"step": 28725
|
13415 |
+
},
|
13416 |
+
{
|
13417 |
+
"epoch": 0.05596687944726378,
|
13418 |
+
"grad_norm": 3.6415061950683594,
|
13419 |
+
"learning_rate": 2.9768738108604222e-05,
|
13420 |
+
"loss": 2.033,
|
13421 |
+
"step": 28740
|
13422 |
+
},
|
13423 |
+
{
|
13424 |
+
"epoch": 0.05599608971837404,
|
13425 |
+
"grad_norm": 2.5112550258636475,
|
13426 |
+
"learning_rate": 2.9768497267916833e-05,
|
13427 |
+
"loss": 1.9643,
|
13428 |
+
"step": 28755
|
13429 |
+
},
|
13430 |
+
{
|
13431 |
+
"epoch": 0.0560252999894843,
|
13432 |
+
"grad_norm": 3.3538918495178223,
|
13433 |
+
"learning_rate": 2.9768256302861852e-05,
|
13434 |
+
"loss": 1.7964,
|
13435 |
+
"step": 28770
|
13436 |
+
},
|
13437 |
+
{
|
13438 |
+
"epoch": 0.05605451026059457,
|
13439 |
+
"grad_norm": 1.7511709928512573,
|
13440 |
+
"learning_rate": 2.9768015213441306e-05,
|
13441 |
+
"loss": 1.9891,
|
13442 |
+
"step": 28785
|
13443 |
+
},
|
13444 |
+
{
|
13445 |
+
"epoch": 0.05608372053170483,
|
13446 |
+
"grad_norm": 4.007176399230957,
|
13447 |
+
"learning_rate": 2.9767773999657225e-05,
|
13448 |
+
"loss": 2.0263,
|
13449 |
+
"step": 28800
|
13450 |
+
},
|
13451 |
+
{
|
13452 |
+
"epoch": 0.05611293080281509,
|
13453 |
+
"grad_norm": 2.968200206756592,
|
13454 |
+
"learning_rate": 2.9767532661511644e-05,
|
13455 |
+
"loss": 1.7171,
|
13456 |
+
"step": 28815
|
13457 |
+
},
|
13458 |
+
{
|
13459 |
+
"epoch": 0.05614214107392535,
|
13460 |
+
"grad_norm": 3.2194066047668457,
|
13461 |
+
"learning_rate": 2.9767291199006594e-05,
|
13462 |
+
"loss": 1.8825,
|
13463 |
+
"step": 28830
|
13464 |
+
},
|
13465 |
+
{
|
13466 |
+
"epoch": 0.05617135134503562,
|
13467 |
+
"grad_norm": 3.994147539138794,
|
13468 |
+
"learning_rate": 2.976704961214411e-05,
|
13469 |
+
"loss": 1.6859,
|
13470 |
+
"step": 28845
|
13471 |
+
},
|
13472 |
+
{
|
13473 |
+
"epoch": 0.05620056161614588,
|
13474 |
+
"grad_norm": 2.1359071731567383,
|
13475 |
+
"learning_rate": 2.976680790092622e-05,
|
13476 |
+
"loss": 1.7796,
|
13477 |
+
"step": 28860
|
13478 |
+
},
|
13479 |
+
{
|
13480 |
+
"epoch": 0.05622977188725614,
|
13481 |
+
"grad_norm": 3.074885606765747,
|
13482 |
+
"learning_rate": 2.976656606535497e-05,
|
13483 |
+
"loss": 1.9507,
|
13484 |
+
"step": 28875
|
13485 |
+
},
|
13486 |
+
{
|
13487 |
+
"epoch": 0.05625898215836641,
|
13488 |
+
"grad_norm": 3.184913396835327,
|
13489 |
+
"learning_rate": 2.9766324105432385e-05,
|
13490 |
+
"loss": 1.9536,
|
13491 |
+
"step": 28890
|
13492 |
+
},
|
13493 |
+
{
|
13494 |
+
"epoch": 0.05628819242947667,
|
13495 |
+
"grad_norm": 3.911243438720703,
|
13496 |
+
"learning_rate": 2.976608202116051e-05,
|
13497 |
+
"loss": 1.8609,
|
13498 |
+
"step": 28905
|
13499 |
+
},
|
13500 |
+
{
|
13501 |
+
"epoch": 0.05631740270058693,
|
13502 |
+
"grad_norm": 4.364305019378662,
|
13503 |
+
"learning_rate": 2.9765839812541378e-05,
|
13504 |
+
"loss": 1.8573,
|
13505 |
+
"step": 28920
|
13506 |
+
},
|
13507 |
+
{
|
13508 |
+
"epoch": 0.05634661297169719,
|
13509 |
+
"grad_norm": 3.0088484287261963,
|
13510 |
+
"learning_rate": 2.9765597479577034e-05,
|
13511 |
+
"loss": 2.0879,
|
13512 |
+
"step": 28935
|
13513 |
+
},
|
13514 |
+
{
|
13515 |
+
"epoch": 0.05637582324280746,
|
13516 |
+
"grad_norm": 1.9867300987243652,
|
13517 |
+
"learning_rate": 2.9765355022269518e-05,
|
13518 |
+
"loss": 1.7799,
|
13519 |
+
"step": 28950
|
13520 |
+
},
|
13521 |
+
{
|
13522 |
+
"epoch": 0.05640503351391772,
|
13523 |
+
"grad_norm": 2.1443519592285156,
|
13524 |
+
"learning_rate": 2.9765112440620874e-05,
|
13525 |
+
"loss": 1.7201,
|
13526 |
+
"step": 28965
|
13527 |
+
},
|
13528 |
+
{
|
13529 |
+
"epoch": 0.05643424378502798,
|
13530 |
+
"grad_norm": 2.226407051086426,
|
13531 |
+
"learning_rate": 2.9764869734633134e-05,
|
13532 |
+
"loss": 1.7974,
|
13533 |
+
"step": 28980
|
13534 |
+
},
|
13535 |
+
{
|
13536 |
+
"epoch": 0.05646345405613824,
|
13537 |
+
"grad_norm": 4.08579158782959,
|
13538 |
+
"learning_rate": 2.9764626904308354e-05,
|
13539 |
+
"loss": 1.8633,
|
13540 |
+
"step": 28995
|
13541 |
+
},
|
13542 |
+
{
|
13543 |
+
"epoch": 0.05649266432724851,
|
13544 |
+
"grad_norm": 4.314965724945068,
|
13545 |
+
"learning_rate": 2.9764383949648576e-05,
|
13546 |
+
"loss": 1.9242,
|
13547 |
+
"step": 29010
|
13548 |
+
},
|
13549 |
+
{
|
13550 |
+
"epoch": 0.05652187459835877,
|
13551 |
+
"grad_norm": 4.151242733001709,
|
13552 |
+
"learning_rate": 2.976414087065584e-05,
|
13553 |
+
"loss": 1.942,
|
13554 |
+
"step": 29025
|
13555 |
+
},
|
13556 |
+
{
|
13557 |
+
"epoch": 0.05655108486946903,
|
13558 |
+
"grad_norm": 1.9677321910858154,
|
13559 |
+
"learning_rate": 2.97638976673322e-05,
|
13560 |
+
"loss": 1.8371,
|
13561 |
+
"step": 29040
|
13562 |
+
},
|
13563 |
+
{
|
13564 |
+
"epoch": 0.0565802951405793,
|
13565 |
+
"grad_norm": 2.814545154571533,
|
13566 |
+
"learning_rate": 2.97636543396797e-05,
|
13567 |
+
"loss": 1.9929,
|
13568 |
+
"step": 29055
|
13569 |
+
},
|
13570 |
+
{
|
13571 |
+
"epoch": 0.05660950541168956,
|
13572 |
+
"grad_norm": 3.4638845920562744,
|
13573 |
+
"learning_rate": 2.976341088770039e-05,
|
13574 |
+
"loss": 1.8635,
|
13575 |
+
"step": 29070
|
13576 |
+
},
|
13577 |
+
{
|
13578 |
+
"epoch": 0.05663871568279982,
|
13579 |
+
"grad_norm": 2.0748291015625,
|
13580 |
+
"learning_rate": 2.976316731139632e-05,
|
13581 |
+
"loss": 1.6827,
|
13582 |
+
"step": 29085
|
13583 |
+
},
|
13584 |
+
{
|
13585 |
+
"epoch": 0.056667925953910084,
|
13586 |
+
"grad_norm": 2.4861667156219482,
|
13587 |
+
"learning_rate": 2.9762923610769545e-05,
|
13588 |
+
"loss": 1.8022,
|
13589 |
+
"step": 29100
|
13590 |
+
},
|
13591 |
+
{
|
13592 |
+
"epoch": 0.05669713622502035,
|
13593 |
+
"grad_norm": 2.12156343460083,
|
13594 |
+
"learning_rate": 2.9762679785822113e-05,
|
13595 |
+
"loss": 1.8912,
|
13596 |
+
"step": 29115
|
13597 |
+
},
|
13598 |
+
{
|
13599 |
+
"epoch": 0.05672634649613061,
|
13600 |
+
"grad_norm": 2.3051788806915283,
|
13601 |
+
"learning_rate": 2.9762435836556075e-05,
|
13602 |
+
"loss": 1.786,
|
13603 |
+
"step": 29130
|
13604 |
+
},
|
13605 |
+
{
|
13606 |
+
"epoch": 0.056755556767240874,
|
13607 |
+
"grad_norm": 3.0045323371887207,
|
13608 |
+
"learning_rate": 2.9762191762973492e-05,
|
13609 |
+
"loss": 1.9105,
|
13610 |
+
"step": 29145
|
13611 |
+
},
|
13612 |
+
{
|
13613 |
+
"epoch": 0.05678476703835114,
|
13614 |
+
"grad_norm": 2.9411308765411377,
|
13615 |
+
"learning_rate": 2.9761947565076413e-05,
|
13616 |
+
"loss": 1.7949,
|
13617 |
+
"step": 29160
|
13618 |
+
},
|
13619 |
+
{
|
13620 |
+
"epoch": 0.0568139773094614,
|
13621 |
+
"grad_norm": 3.0440449714660645,
|
13622 |
+
"learning_rate": 2.97617032428669e-05,
|
13623 |
+
"loss": 2.0024,
|
13624 |
+
"step": 29175
|
13625 |
+
},
|
13626 |
+
{
|
13627 |
+
"epoch": 0.056843187580571664,
|
13628 |
+
"grad_norm": 3.836582660675049,
|
13629 |
+
"learning_rate": 2.976145879634701e-05,
|
13630 |
+
"loss": 1.9192,
|
13631 |
+
"step": 29190
|
13632 |
+
},
|
13633 |
+
{
|
13634 |
+
"epoch": 0.056872397851681925,
|
13635 |
+
"grad_norm": 3.63226580619812,
|
13636 |
+
"learning_rate": 2.9761214225518792e-05,
|
13637 |
+
"loss": 1.6728,
|
13638 |
+
"step": 29205
|
13639 |
+
},
|
13640 |
+
{
|
13641 |
+
"epoch": 0.05690160812279219,
|
13642 |
+
"grad_norm": 2.588270902633667,
|
13643 |
+
"learning_rate": 2.9760969530384317e-05,
|
13644 |
+
"loss": 1.8828,
|
13645 |
+
"step": 29220
|
13646 |
+
},
|
13647 |
+
{
|
13648 |
+
"epoch": 0.05693081839390245,
|
13649 |
+
"grad_norm": 3.4929933547973633,
|
13650 |
+
"learning_rate": 2.9760724710945642e-05,
|
13651 |
+
"loss": 2.0755,
|
13652 |
+
"step": 29235
|
13653 |
+
},
|
13654 |
+
{
|
13655 |
+
"epoch": 0.056960028665012714,
|
13656 |
+
"grad_norm": 2.916142463684082,
|
13657 |
+
"learning_rate": 2.976047976720483e-05,
|
13658 |
+
"loss": 1.8955,
|
13659 |
+
"step": 29250
|
13660 |
+
},
|
13661 |
+
{
|
13662 |
+
"epoch": 0.05698923893612298,
|
13663 |
+
"grad_norm": 3.606926918029785,
|
13664 |
+
"learning_rate": 2.976023469916394e-05,
|
13665 |
+
"loss": 1.9196,
|
13666 |
+
"step": 29265
|
13667 |
+
},
|
13668 |
+
{
|
13669 |
+
"epoch": 0.05701844920723324,
|
13670 |
+
"grad_norm": 4.033385276794434,
|
13671 |
+
"learning_rate": 2.9759989506825033e-05,
|
13672 |
+
"loss": 1.9555,
|
13673 |
+
"step": 29280
|
13674 |
+
},
|
13675 |
+
{
|
13676 |
+
"epoch": 0.057047659478343504,
|
13677 |
+
"grad_norm": 2.1779022216796875,
|
13678 |
+
"learning_rate": 2.9759744190190185e-05,
|
13679 |
+
"loss": 1.7735,
|
13680 |
+
"step": 29295
|
13681 |
+
},
|
13682 |
+
{
|
13683 |
+
"epoch": 0.057076869749453765,
|
13684 |
+
"grad_norm": 4.592677116394043,
|
13685 |
+
"learning_rate": 2.9759498749261452e-05,
|
13686 |
+
"loss": 1.8971,
|
13687 |
+
"step": 29310
|
13688 |
+
},
|
13689 |
+
{
|
13690 |
+
"epoch": 0.05710608002056403,
|
13691 |
+
"grad_norm": 4.797886371612549,
|
13692 |
+
"learning_rate": 2.9759253184040906e-05,
|
13693 |
+
"loss": 1.8494,
|
13694 |
+
"step": 29325
|
13695 |
+
},
|
13696 |
+
{
|
13697 |
+
"epoch": 0.057135290291674294,
|
13698 |
+
"grad_norm": 2.316049337387085,
|
13699 |
+
"learning_rate": 2.9759007494530615e-05,
|
13700 |
+
"loss": 1.7288,
|
13701 |
+
"step": 29340
|
13702 |
+
},
|
13703 |
+
{
|
13704 |
+
"epoch": 0.057164500562784555,
|
13705 |
+
"grad_norm": 2.9396653175354004,
|
13706 |
+
"learning_rate": 2.975876168073264e-05,
|
13707 |
+
"loss": 1.8751,
|
13708 |
+
"step": 29355
|
13709 |
+
},
|
13710 |
+
{
|
13711 |
+
"epoch": 0.05719371083389482,
|
13712 |
+
"grad_norm": 4.224997043609619,
|
13713 |
+
"learning_rate": 2.9758515742649063e-05,
|
13714 |
+
"loss": 1.78,
|
13715 |
+
"step": 29370
|
13716 |
+
},
|
13717 |
+
{
|
13718 |
+
"epoch": 0.057222921105005084,
|
13719 |
+
"grad_norm": 2.5549795627593994,
|
13720 |
+
"learning_rate": 2.9758269680281946e-05,
|
13721 |
+
"loss": 1.7375,
|
13722 |
+
"step": 29385
|
13723 |
+
},
|
13724 |
+
{
|
13725 |
+
"epoch": 0.057252131376115345,
|
13726 |
+
"grad_norm": 3.9945852756500244,
|
13727 |
+
"learning_rate": 2.9758023493633365e-05,
|
13728 |
+
"loss": 1.7078,
|
13729 |
+
"step": 29400
|
13730 |
+
},
|
13731 |
+
{
|
13732 |
+
"epoch": 0.057281341647225606,
|
13733 |
+
"grad_norm": 3.5278730392456055,
|
13734 |
+
"learning_rate": 2.975777718270539e-05,
|
13735 |
+
"loss": 1.7572,
|
13736 |
+
"step": 29415
|
13737 |
+
},
|
13738 |
+
{
|
13739 |
+
"epoch": 0.057310551918335874,
|
13740 |
+
"grad_norm": 3.9674766063690186,
|
13741 |
+
"learning_rate": 2.9757530747500104e-05,
|
13742 |
+
"loss": 1.9125,
|
13743 |
+
"step": 29430
|
13744 |
+
},
|
13745 |
+
{
|
13746 |
+
"epoch": 0.057339762189446135,
|
13747 |
+
"grad_norm": 3.498309373855591,
|
13748 |
+
"learning_rate": 2.9757284188019573e-05,
|
13749 |
+
"loss": 1.8242,
|
13750 |
+
"step": 29445
|
13751 |
+
},
|
13752 |
+
{
|
13753 |
+
"epoch": 0.057368972460556396,
|
13754 |
+
"grad_norm": 3.2913856506347656,
|
13755 |
+
"learning_rate": 2.9757037504265874e-05,
|
13756 |
+
"loss": 1.8559,
|
13757 |
+
"step": 29460
|
13758 |
+
},
|
13759 |
+
{
|
13760 |
+
"epoch": 0.05739818273166666,
|
13761 |
+
"grad_norm": 2.72259521484375,
|
13762 |
+
"learning_rate": 2.9756790696241088e-05,
|
13763 |
+
"loss": 1.7855,
|
13764 |
+
"step": 29475
|
13765 |
+
},
|
13766 |
+
{
|
13767 |
+
"epoch": 0.057427393002776925,
|
13768 |
+
"grad_norm": 7.3963704109191895,
|
13769 |
+
"learning_rate": 2.9756543763947292e-05,
|
13770 |
+
"loss": 1.9272,
|
13771 |
+
"step": 29490
|
13772 |
+
},
|
13773 |
+
{
|
13774 |
+
"epoch": 0.057456603273887186,
|
13775 |
+
"grad_norm": 2.901502847671509,
|
13776 |
+
"learning_rate": 2.9756296707386566e-05,
|
13777 |
+
"loss": 1.7294,
|
13778 |
+
"step": 29505
|
13779 |
+
},
|
13780 |
+
{
|
13781 |
+
"epoch": 0.05748581354499745,
|
13782 |
+
"grad_norm": 1.9630255699157715,
|
13783 |
+
"learning_rate": 2.9756049526560995e-05,
|
13784 |
+
"loss": 1.8477,
|
13785 |
+
"step": 29520
|
13786 |
+
},
|
13787 |
+
{
|
13788 |
+
"epoch": 0.057515023816107715,
|
13789 |
+
"grad_norm": 2.449836492538452,
|
13790 |
+
"learning_rate": 2.975580222147265e-05,
|
13791 |
+
"loss": 1.9282,
|
13792 |
+
"step": 29535
|
13793 |
+
},
|
13794 |
+
{
|
13795 |
+
"epoch": 0.057544234087217976,
|
13796 |
+
"grad_norm": 2.7461347579956055,
|
13797 |
+
"learning_rate": 2.9755554792123617e-05,
|
13798 |
+
"loss": 1.8883,
|
13799 |
+
"step": 29550
|
13800 |
+
},
|
13801 |
+
{
|
13802 |
+
"epoch": 0.05757344435832824,
|
13803 |
+
"grad_norm": 3.3567450046539307,
|
13804 |
+
"learning_rate": 2.9755307238515986e-05,
|
13805 |
+
"loss": 1.9717,
|
13806 |
+
"step": 29565
|
13807 |
+
},
|
13808 |
+
{
|
13809 |
+
"epoch": 0.0576026546294385,
|
13810 |
+
"grad_norm": 3.6341872215270996,
|
13811 |
+
"learning_rate": 2.975505956065184e-05,
|
13812 |
+
"loss": 1.9378,
|
13813 |
+
"step": 29580
|
13814 |
+
},
|
13815 |
+
{
|
13816 |
+
"epoch": 0.057631864900548765,
|
13817 |
+
"grad_norm": 4.170873165130615,
|
13818 |
+
"learning_rate": 2.9754811758533253e-05,
|
13819 |
+
"loss": 1.9077,
|
13820 |
+
"step": 29595
|
13821 |
+
},
|
13822 |
+
{
|
13823 |
+
"epoch": 0.057661075171659026,
|
13824 |
+
"grad_norm": 1.9185329675674438,
|
13825 |
+
"learning_rate": 2.975456383216233e-05,
|
13826 |
+
"loss": 1.7987,
|
13827 |
+
"step": 29610
|
13828 |
+
},
|
13829 |
+
{
|
13830 |
+
"epoch": 0.05769028544276929,
|
13831 |
+
"grad_norm": 2.3139781951904297,
|
13832 |
+
"learning_rate": 2.9754315781541144e-05,
|
13833 |
+
"loss": 1.9518,
|
13834 |
+
"step": 29625
|
13835 |
+
},
|
13836 |
+
{
|
13837 |
+
"epoch": 0.057719495713879555,
|
13838 |
+
"grad_norm": 3.581907033920288,
|
13839 |
+
"learning_rate": 2.9754067606671794e-05,
|
13840 |
+
"loss": 1.8729,
|
13841 |
+
"step": 29640
|
13842 |
+
},
|
13843 |
+
{
|
13844 |
+
"epoch": 0.057748705984989816,
|
13845 |
+
"grad_norm": 2.490278720855713,
|
13846 |
+
"learning_rate": 2.975381930755636e-05,
|
13847 |
+
"loss": 1.7576,
|
13848 |
+
"step": 29655
|
13849 |
+
},
|
13850 |
+
{
|
13851 |
+
"epoch": 0.05777791625610008,
|
13852 |
+
"grad_norm": 3.5740153789520264,
|
13853 |
+
"learning_rate": 2.9753570884196942e-05,
|
13854 |
+
"loss": 1.9749,
|
13855 |
+
"step": 29670
|
13856 |
+
},
|
13857 |
+
{
|
13858 |
+
"epoch": 0.05780712652721034,
|
13859 |
+
"grad_norm": 3.012131929397583,
|
13860 |
+
"learning_rate": 2.975332233659563e-05,
|
13861 |
+
"loss": 1.8704,
|
13862 |
+
"step": 29685
|
13863 |
+
},
|
13864 |
+
{
|
13865 |
+
"epoch": 0.057836336798320606,
|
13866 |
+
"grad_norm": 4.225703239440918,
|
13867 |
+
"learning_rate": 2.9753073664754514e-05,
|
13868 |
+
"loss": 1.6134,
|
13869 |
+
"step": 29700
|
13870 |
+
},
|
13871 |
+
{
|
13872 |
+
"epoch": 0.05786554706943087,
|
13873 |
+
"grad_norm": 3.840487480163574,
|
13874 |
+
"learning_rate": 2.9752824868675693e-05,
|
13875 |
+
"loss": 1.822,
|
13876 |
+
"step": 29715
|
13877 |
+
},
|
13878 |
+
{
|
13879 |
+
"epoch": 0.05789475734054113,
|
13880 |
+
"grad_norm": 2.8694260120391846,
|
13881 |
+
"learning_rate": 2.975257594836125e-05,
|
13882 |
+
"loss": 1.939,
|
13883 |
+
"step": 29730
|
13884 |
+
},
|
13885 |
+
{
|
13886 |
+
"epoch": 0.057923967611651396,
|
13887 |
+
"grad_norm": 3.4660465717315674,
|
13888 |
+
"learning_rate": 2.97523269038133e-05,
|
13889 |
+
"loss": 1.9517,
|
13890 |
+
"step": 29745
|
13891 |
+
},
|
13892 |
+
{
|
13893 |
+
"epoch": 0.05795317788276166,
|
13894 |
+
"grad_norm": 3.125666379928589,
|
13895 |
+
"learning_rate": 2.9752077735033924e-05,
|
13896 |
+
"loss": 1.7034,
|
13897 |
+
"step": 29760
|
13898 |
+
},
|
13899 |
+
{
|
13900 |
+
"epoch": 0.05798238815387192,
|
13901 |
+
"grad_norm": 4.895527362823486,
|
13902 |
+
"learning_rate": 2.975182844202523e-05,
|
13903 |
+
"loss": 1.8021,
|
13904 |
+
"step": 29775
|
13905 |
+
},
|
13906 |
+
{
|
13907 |
+
"epoch": 0.05801159842498218,
|
13908 |
+
"grad_norm": 3.4364778995513916,
|
13909 |
+
"learning_rate": 2.9751579024789314e-05,
|
13910 |
+
"loss": 1.9357,
|
13911 |
+
"step": 29790
|
13912 |
+
},
|
13913 |
+
{
|
13914 |
+
"epoch": 0.05804080869609245,
|
13915 |
+
"grad_norm": 3.209791421890259,
|
13916 |
+
"learning_rate": 2.9751329483328276e-05,
|
13917 |
+
"loss": 1.8969,
|
13918 |
+
"step": 29805
|
13919 |
+
},
|
13920 |
+
{
|
13921 |
+
"epoch": 0.05807001896720271,
|
13922 |
+
"grad_norm": 2.851810932159424,
|
13923 |
+
"learning_rate": 2.9751079817644217e-05,
|
13924 |
+
"loss": 1.8274,
|
13925 |
+
"step": 29820
|
13926 |
+
},
|
13927 |
+
{
|
13928 |
+
"epoch": 0.05809922923831297,
|
13929 |
+
"grad_norm": 1.9028266668319702,
|
13930 |
+
"learning_rate": 2.975083002773924e-05,
|
13931 |
+
"loss": 1.7498,
|
13932 |
+
"step": 29835
|
13933 |
+
},
|
13934 |
+
{
|
13935 |
+
"epoch": 0.05812843950942324,
|
13936 |
+
"grad_norm": 4.442659378051758,
|
13937 |
+
"learning_rate": 2.9750580113615448e-05,
|
13938 |
+
"loss": 1.906,
|
13939 |
+
"step": 29850
|
13940 |
+
},
|
13941 |
+
{
|
13942 |
+
"epoch": 0.0581576497805335,
|
13943 |
+
"grad_norm": 4.513755798339844,
|
13944 |
+
"learning_rate": 2.9750330075274948e-05,
|
13945 |
+
"loss": 1.7855,
|
13946 |
+
"step": 29865
|
13947 |
+
},
|
13948 |
+
{
|
13949 |
+
"epoch": 0.05818686005164376,
|
13950 |
+
"grad_norm": 2.4421234130859375,
|
13951 |
+
"learning_rate": 2.975007991271984e-05,
|
13952 |
+
"loss": 1.8355,
|
13953 |
+
"step": 29880
|
13954 |
+
},
|
13955 |
+
{
|
13956 |
+
"epoch": 0.05821607032275402,
|
13957 |
+
"grad_norm": 2.0383033752441406,
|
13958 |
+
"learning_rate": 2.974982962595224e-05,
|
13959 |
+
"loss": 1.8456,
|
13960 |
+
"step": 29895
|
13961 |
+
},
|
13962 |
+
{
|
13963 |
+
"epoch": 0.05824528059386429,
|
13964 |
+
"grad_norm": 2.9908933639526367,
|
13965 |
+
"learning_rate": 2.9749579214974245e-05,
|
13966 |
+
"loss": 1.7223,
|
13967 |
+
"step": 29910
|
13968 |
+
},
|
13969 |
+
{
|
13970 |
+
"epoch": 0.05827449086497455,
|
13971 |
+
"grad_norm": 3.1746137142181396,
|
13972 |
+
"learning_rate": 2.9749328679787976e-05,
|
13973 |
+
"loss": 1.8949,
|
13974 |
+
"step": 29925
|
13975 |
+
},
|
13976 |
+
{
|
13977 |
+
"epoch": 0.05830370113608481,
|
13978 |
+
"grad_norm": 3.351712942123413,
|
13979 |
+
"learning_rate": 2.9749078020395526e-05,
|
13980 |
+
"loss": 1.8423,
|
13981 |
+
"step": 29940
|
13982 |
+
},
|
13983 |
+
{
|
13984 |
+
"epoch": 0.05833291140719508,
|
13985 |
+
"grad_norm": 4.363383769989014,
|
13986 |
+
"learning_rate": 2.9748827236799024e-05,
|
13987 |
+
"loss": 2.0718,
|
13988 |
+
"step": 29955
|
13989 |
+
},
|
13990 |
+
{
|
13991 |
+
"epoch": 0.05836212167830534,
|
13992 |
+
"grad_norm": 3.5311388969421387,
|
13993 |
+
"learning_rate": 2.974857632900057e-05,
|
13994 |
+
"loss": 1.8393,
|
13995 |
+
"step": 29970
|
13996 |
+
},
|
13997 |
+
{
|
13998 |
+
"epoch": 0.0583913319494156,
|
13999 |
+
"grad_norm": 2.739178419113159,
|
14000 |
+
"learning_rate": 2.974832529700228e-05,
|
14001 |
+
"loss": 1.901,
|
14002 |
+
"step": 29985
|
14003 |
+
},
|
14004 |
+
{
|
14005 |
+
"epoch": 0.05842054222052586,
|
14006 |
+
"grad_norm": 5.376321315765381,
|
14007 |
+
"learning_rate": 2.974807414080627e-05,
|
14008 |
+
"loss": 1.9742,
|
14009 |
+
"step": 30000
|
14010 |
}
|
14011 |
],
|
14012 |
"logging_steps": 15,
|
|
|
14026 |
"attributes": {}
|
14027 |
}
|
14028 |
},
|
14029 |
+
"total_flos": 4.741878296671027e+16,
|
14030 |
"train_batch_size": 4,
|
14031 |
"trial_name": null,
|
14032 |
"trial_params": null
|