Training in progress, step 6214, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140880624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:294d261cda5ced74edd1d4c98c51cee9a61117ef6dd5f02b8134b6528effeb90
|
3 |
size 1140880624
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2281891834
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ff5ee676f925a2181707ff1fd85a4b1de7eef1a04ca23f5ffa62cf1a93e7d67
|
3 |
size 2281891834
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4853c64c7ec9ed6b1547fa5a6e7d3e434268dca3c243cd388dc441364d2a3694
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:333a9a1890a497931376258e58c4cbb393bb12fc883542acc2d5b199df5d4780
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -41832,6 +41832,1679 @@
|
|
41832 |
"learning_rate": 8.6607118570576e-05,
|
41833 |
"loss": 0.1316,
|
41834 |
"step": 5975
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41835 |
}
|
41836 |
],
|
41837 |
"logging_steps": 1,
|
@@ -41851,7 +43524,7 @@
|
|
41851 |
"attributes": {}
|
41852 |
}
|
41853 |
},
|
41854 |
-
"total_flos": 3.
|
41855 |
"train_batch_size": 4,
|
41856 |
"trial_name": null,
|
41857 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.25125597662923166,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 6214,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
41832 |
"learning_rate": 8.6607118570576e-05,
|
41833 |
"loss": 0.1316,
|
41834 |
"step": 5975
|
41835 |
+
},
|
41836 |
+
{
|
41837 |
+
"epoch": 0.24163271907568207,
|
41838 |
+
"grad_norm": 2.853416919708252,
|
41839 |
+
"learning_rate": 8.660277435465594e-05,
|
41840 |
+
"loss": 0.1694,
|
41841 |
+
"step": 5976
|
41842 |
+
},
|
41843 |
+
{
|
41844 |
+
"epoch": 0.24167315293094907,
|
41845 |
+
"grad_norm": 5.313234329223633,
|
41846 |
+
"learning_rate": 8.659842954327992e-05,
|
41847 |
+
"loss": 0.2186,
|
41848 |
+
"step": 5977
|
41849 |
+
},
|
41850 |
+
{
|
41851 |
+
"epoch": 0.2417135867862161,
|
41852 |
+
"grad_norm": 2.550760507583618,
|
41853 |
+
"learning_rate": 8.659408413651861e-05,
|
41854 |
+
"loss": 0.094,
|
41855 |
+
"step": 5978
|
41856 |
+
},
|
41857 |
+
{
|
41858 |
+
"epoch": 0.2417540206414831,
|
41859 |
+
"grad_norm": 3.766831874847412,
|
41860 |
+
"learning_rate": 8.65897381344427e-05,
|
41861 |
+
"loss": 0.2396,
|
41862 |
+
"step": 5979
|
41863 |
+
},
|
41864 |
+
{
|
41865 |
+
"epoch": 0.24179445449675013,
|
41866 |
+
"grad_norm": 7.09255313873291,
|
41867 |
+
"learning_rate": 8.658539153712286e-05,
|
41868 |
+
"loss": 0.2572,
|
41869 |
+
"step": 5980
|
41870 |
+
},
|
41871 |
+
{
|
41872 |
+
"epoch": 0.24183488835201714,
|
41873 |
+
"grad_norm": 6.072479724884033,
|
41874 |
+
"learning_rate": 8.658104434462982e-05,
|
41875 |
+
"loss": 0.1831,
|
41876 |
+
"step": 5981
|
41877 |
+
},
|
41878 |
+
{
|
41879 |
+
"epoch": 0.24187532220728417,
|
41880 |
+
"grad_norm": 4.733118057250977,
|
41881 |
+
"learning_rate": 8.657669655703433e-05,
|
41882 |
+
"loss": 0.1759,
|
41883 |
+
"step": 5982
|
41884 |
+
},
|
41885 |
+
{
|
41886 |
+
"epoch": 0.24191575606255117,
|
41887 |
+
"grad_norm": 5.013708114624023,
|
41888 |
+
"learning_rate": 8.657234817440708e-05,
|
41889 |
+
"loss": 0.2601,
|
41890 |
+
"step": 5983
|
41891 |
+
},
|
41892 |
+
{
|
41893 |
+
"epoch": 0.2419561899178182,
|
41894 |
+
"grad_norm": 4.62626314163208,
|
41895 |
+
"learning_rate": 8.656799919681883e-05,
|
41896 |
+
"loss": 0.2134,
|
41897 |
+
"step": 5984
|
41898 |
+
},
|
41899 |
+
{
|
41900 |
+
"epoch": 0.2419966237730852,
|
41901 |
+
"grad_norm": 3.8375024795532227,
|
41902 |
+
"learning_rate": 8.656364962434033e-05,
|
41903 |
+
"loss": 0.301,
|
41904 |
+
"step": 5985
|
41905 |
+
},
|
41906 |
+
{
|
41907 |
+
"epoch": 0.24203705762835223,
|
41908 |
+
"grad_norm": 2.5020620822906494,
|
41909 |
+
"learning_rate": 8.655929945704233e-05,
|
41910 |
+
"loss": 0.1129,
|
41911 |
+
"step": 5986
|
41912 |
+
},
|
41913 |
+
{
|
41914 |
+
"epoch": 0.24207749148361923,
|
41915 |
+
"grad_norm": 3.215634822845459,
|
41916 |
+
"learning_rate": 8.655494869499558e-05,
|
41917 |
+
"loss": 0.2104,
|
41918 |
+
"step": 5987
|
41919 |
+
},
|
41920 |
+
{
|
41921 |
+
"epoch": 0.24211792533888624,
|
41922 |
+
"grad_norm": 3.1493115425109863,
|
41923 |
+
"learning_rate": 8.65505973382709e-05,
|
41924 |
+
"loss": 0.1872,
|
41925 |
+
"step": 5988
|
41926 |
+
},
|
41927 |
+
{
|
41928 |
+
"epoch": 0.24215835919415327,
|
41929 |
+
"grad_norm": 3.9308154582977295,
|
41930 |
+
"learning_rate": 8.654624538693907e-05,
|
41931 |
+
"loss": 0.2378,
|
41932 |
+
"step": 5989
|
41933 |
+
},
|
41934 |
+
{
|
41935 |
+
"epoch": 0.24219879304942027,
|
41936 |
+
"grad_norm": 2.025744915008545,
|
41937 |
+
"learning_rate": 8.654189284107088e-05,
|
41938 |
+
"loss": 0.1456,
|
41939 |
+
"step": 5990
|
41940 |
+
},
|
41941 |
+
{
|
41942 |
+
"epoch": 0.2422392269046873,
|
41943 |
+
"grad_norm": 3.391645908355713,
|
41944 |
+
"learning_rate": 8.65375397007371e-05,
|
41945 |
+
"loss": 0.1188,
|
41946 |
+
"step": 5991
|
41947 |
+
},
|
41948 |
+
{
|
41949 |
+
"epoch": 0.2422796607599543,
|
41950 |
+
"grad_norm": 7.289383411407471,
|
41951 |
+
"learning_rate": 8.653318596600862e-05,
|
41952 |
+
"loss": 0.208,
|
41953 |
+
"step": 5992
|
41954 |
+
},
|
41955 |
+
{
|
41956 |
+
"epoch": 0.24232009461522133,
|
41957 |
+
"grad_norm": 3.654409408569336,
|
41958 |
+
"learning_rate": 8.65288316369562e-05,
|
41959 |
+
"loss": 0.1813,
|
41960 |
+
"step": 5993
|
41961 |
+
},
|
41962 |
+
{
|
41963 |
+
"epoch": 0.24236052847048833,
|
41964 |
+
"grad_norm": 4.555336952209473,
|
41965 |
+
"learning_rate": 8.652447671365072e-05,
|
41966 |
+
"loss": 0.1197,
|
41967 |
+
"step": 5994
|
41968 |
+
},
|
41969 |
+
{
|
41970 |
+
"epoch": 0.24240096232575536,
|
41971 |
+
"grad_norm": 4.378109931945801,
|
41972 |
+
"learning_rate": 8.6520121196163e-05,
|
41973 |
+
"loss": 0.1918,
|
41974 |
+
"step": 5995
|
41975 |
+
},
|
41976 |
+
{
|
41977 |
+
"epoch": 0.24244139618102237,
|
41978 |
+
"grad_norm": 1.2791835069656372,
|
41979 |
+
"learning_rate": 8.651576508456392e-05,
|
41980 |
+
"loss": 0.0739,
|
41981 |
+
"step": 5996
|
41982 |
+
},
|
41983 |
+
{
|
41984 |
+
"epoch": 0.2424818300362894,
|
41985 |
+
"grad_norm": 3.9173526763916016,
|
41986 |
+
"learning_rate": 8.651140837892432e-05,
|
41987 |
+
"loss": 0.1931,
|
41988 |
+
"step": 5997
|
41989 |
+
},
|
41990 |
+
{
|
41991 |
+
"epoch": 0.2425222638915564,
|
41992 |
+
"grad_norm": 4.513161659240723,
|
41993 |
+
"learning_rate": 8.65070510793151e-05,
|
41994 |
+
"loss": 0.0788,
|
41995 |
+
"step": 5998
|
41996 |
+
},
|
41997 |
+
{
|
41998 |
+
"epoch": 0.2425626977468234,
|
41999 |
+
"grad_norm": 7.073320388793945,
|
42000 |
+
"learning_rate": 8.65026931858071e-05,
|
42001 |
+
"loss": 0.2172,
|
42002 |
+
"step": 5999
|
42003 |
+
},
|
42004 |
+
{
|
42005 |
+
"epoch": 0.24260313160209043,
|
42006 |
+
"grad_norm": 3.8800010681152344,
|
42007 |
+
"learning_rate": 8.649833469847129e-05,
|
42008 |
+
"loss": 0.1563,
|
42009 |
+
"step": 6000
|
42010 |
+
},
|
42011 |
+
{
|
42012 |
+
"epoch": 0.24264356545735744,
|
42013 |
+
"grad_norm": 2.8107495307922363,
|
42014 |
+
"learning_rate": 8.649397561737851e-05,
|
42015 |
+
"loss": 0.2392,
|
42016 |
+
"step": 6001
|
42017 |
+
},
|
42018 |
+
{
|
42019 |
+
"epoch": 0.24268399931262447,
|
42020 |
+
"grad_norm": 8.04423713684082,
|
42021 |
+
"learning_rate": 8.648961594259968e-05,
|
42022 |
+
"loss": 0.2228,
|
42023 |
+
"step": 6002
|
42024 |
+
},
|
42025 |
+
{
|
42026 |
+
"epoch": 0.24272443316789147,
|
42027 |
+
"grad_norm": 6.598309516906738,
|
42028 |
+
"learning_rate": 8.648525567420576e-05,
|
42029 |
+
"loss": 0.1908,
|
42030 |
+
"step": 6003
|
42031 |
+
},
|
42032 |
+
{
|
42033 |
+
"epoch": 0.2427648670231585,
|
42034 |
+
"grad_norm": 3.7928972244262695,
|
42035 |
+
"learning_rate": 8.648089481226764e-05,
|
42036 |
+
"loss": 0.2214,
|
42037 |
+
"step": 6004
|
42038 |
+
},
|
42039 |
+
{
|
42040 |
+
"epoch": 0.2428053008784255,
|
42041 |
+
"grad_norm": 1.895093321800232,
|
42042 |
+
"learning_rate": 8.647653335685631e-05,
|
42043 |
+
"loss": 0.1524,
|
42044 |
+
"step": 6005
|
42045 |
+
},
|
42046 |
+
{
|
42047 |
+
"epoch": 0.24284573473369253,
|
42048 |
+
"grad_norm": 4.698232650756836,
|
42049 |
+
"learning_rate": 8.647217130804268e-05,
|
42050 |
+
"loss": 0.1546,
|
42051 |
+
"step": 6006
|
42052 |
+
},
|
42053 |
+
{
|
42054 |
+
"epoch": 0.24288616858895953,
|
42055 |
+
"grad_norm": 3.976858615875244,
|
42056 |
+
"learning_rate": 8.646780866589772e-05,
|
42057 |
+
"loss": 0.2591,
|
42058 |
+
"step": 6007
|
42059 |
+
},
|
42060 |
+
{
|
42061 |
+
"epoch": 0.24292660244422656,
|
42062 |
+
"grad_norm": 5.355378150939941,
|
42063 |
+
"learning_rate": 8.646344543049243e-05,
|
42064 |
+
"loss": 0.2391,
|
42065 |
+
"step": 6008
|
42066 |
+
},
|
42067 |
+
{
|
42068 |
+
"epoch": 0.24296703629949357,
|
42069 |
+
"grad_norm": 3.624940872192383,
|
42070 |
+
"learning_rate": 8.645908160189774e-05,
|
42071 |
+
"loss": 0.1035,
|
42072 |
+
"step": 6009
|
42073 |
+
},
|
42074 |
+
{
|
42075 |
+
"epoch": 0.24300747015476057,
|
42076 |
+
"grad_norm": 7.696066379547119,
|
42077 |
+
"learning_rate": 8.64547171801847e-05,
|
42078 |
+
"loss": 0.1876,
|
42079 |
+
"step": 6010
|
42080 |
+
},
|
42081 |
+
{
|
42082 |
+
"epoch": 0.2430479040100276,
|
42083 |
+
"grad_norm": 4.4118452072143555,
|
42084 |
+
"learning_rate": 8.645035216542428e-05,
|
42085 |
+
"loss": 0.1505,
|
42086 |
+
"step": 6011
|
42087 |
+
},
|
42088 |
+
{
|
42089 |
+
"epoch": 0.2430883378652946,
|
42090 |
+
"grad_norm": 9.357704162597656,
|
42091 |
+
"learning_rate": 8.64459865576875e-05,
|
42092 |
+
"loss": 0.2712,
|
42093 |
+
"step": 6012
|
42094 |
+
},
|
42095 |
+
{
|
42096 |
+
"epoch": 0.24312877172056163,
|
42097 |
+
"grad_norm": 2.713459014892578,
|
42098 |
+
"learning_rate": 8.644162035704535e-05,
|
42099 |
+
"loss": 0.1985,
|
42100 |
+
"step": 6013
|
42101 |
+
},
|
42102 |
+
{
|
42103 |
+
"epoch": 0.24316920557582863,
|
42104 |
+
"grad_norm": 5.679785251617432,
|
42105 |
+
"learning_rate": 8.643725356356889e-05,
|
42106 |
+
"loss": 0.1645,
|
42107 |
+
"step": 6014
|
42108 |
+
},
|
42109 |
+
{
|
42110 |
+
"epoch": 0.24320963943109566,
|
42111 |
+
"grad_norm": 7.06746244430542,
|
42112 |
+
"learning_rate": 8.643288617732917e-05,
|
42113 |
+
"loss": 0.2948,
|
42114 |
+
"step": 6015
|
42115 |
+
},
|
42116 |
+
{
|
42117 |
+
"epoch": 0.24325007328636267,
|
42118 |
+
"grad_norm": 3.614020347595215,
|
42119 |
+
"learning_rate": 8.642851819839719e-05,
|
42120 |
+
"loss": 0.2815,
|
42121 |
+
"step": 6016
|
42122 |
+
},
|
42123 |
+
{
|
42124 |
+
"epoch": 0.2432905071416297,
|
42125 |
+
"grad_norm": 8.398319244384766,
|
42126 |
+
"learning_rate": 8.642414962684406e-05,
|
42127 |
+
"loss": 0.1573,
|
42128 |
+
"step": 6017
|
42129 |
+
},
|
42130 |
+
{
|
42131 |
+
"epoch": 0.2433309409968967,
|
42132 |
+
"grad_norm": 5.846600532531738,
|
42133 |
+
"learning_rate": 8.641978046274082e-05,
|
42134 |
+
"loss": 0.1309,
|
42135 |
+
"step": 6018
|
42136 |
+
},
|
42137 |
+
{
|
42138 |
+
"epoch": 0.24337137485216373,
|
42139 |
+
"grad_norm": 2.964691400527954,
|
42140 |
+
"learning_rate": 8.641541070615855e-05,
|
42141 |
+
"loss": 0.0609,
|
42142 |
+
"step": 6019
|
42143 |
+
},
|
42144 |
+
{
|
42145 |
+
"epoch": 0.24341180870743073,
|
42146 |
+
"grad_norm": 4.7332444190979,
|
42147 |
+
"learning_rate": 8.641104035716836e-05,
|
42148 |
+
"loss": 0.2204,
|
42149 |
+
"step": 6020
|
42150 |
+
},
|
42151 |
+
{
|
42152 |
+
"epoch": 0.24345224256269773,
|
42153 |
+
"grad_norm": 3.9071009159088135,
|
42154 |
+
"learning_rate": 8.640666941584132e-05,
|
42155 |
+
"loss": 0.1772,
|
42156 |
+
"step": 6021
|
42157 |
+
},
|
42158 |
+
{
|
42159 |
+
"epoch": 0.24349267641796477,
|
42160 |
+
"grad_norm": 4.52919864654541,
|
42161 |
+
"learning_rate": 8.640229788224853e-05,
|
42162 |
+
"loss": 0.1845,
|
42163 |
+
"step": 6022
|
42164 |
+
},
|
42165 |
+
{
|
42166 |
+
"epoch": 0.24353311027323177,
|
42167 |
+
"grad_norm": 6.53327751159668,
|
42168 |
+
"learning_rate": 8.639792575646115e-05,
|
42169 |
+
"loss": 0.1407,
|
42170 |
+
"step": 6023
|
42171 |
+
},
|
42172 |
+
{
|
42173 |
+
"epoch": 0.2435735441284988,
|
42174 |
+
"grad_norm": 4.435870170593262,
|
42175 |
+
"learning_rate": 8.639355303855028e-05,
|
42176 |
+
"loss": 0.0937,
|
42177 |
+
"step": 6024
|
42178 |
+
},
|
42179 |
+
{
|
42180 |
+
"epoch": 0.2436139779837658,
|
42181 |
+
"grad_norm": 6.9111480712890625,
|
42182 |
+
"learning_rate": 8.638917972858704e-05,
|
42183 |
+
"loss": 0.2429,
|
42184 |
+
"step": 6025
|
42185 |
+
},
|
42186 |
+
{
|
42187 |
+
"epoch": 0.24365441183903283,
|
42188 |
+
"grad_norm": 4.262193202972412,
|
42189 |
+
"learning_rate": 8.638480582664259e-05,
|
42190 |
+
"loss": 0.22,
|
42191 |
+
"step": 6026
|
42192 |
+
},
|
42193 |
+
{
|
42194 |
+
"epoch": 0.24369484569429983,
|
42195 |
+
"grad_norm": 3.620594024658203,
|
42196 |
+
"learning_rate": 8.638043133278809e-05,
|
42197 |
+
"loss": 0.1129,
|
42198 |
+
"step": 6027
|
42199 |
+
},
|
42200 |
+
{
|
42201 |
+
"epoch": 0.24373527954956686,
|
42202 |
+
"grad_norm": 6.571465969085693,
|
42203 |
+
"learning_rate": 8.637605624709468e-05,
|
42204 |
+
"loss": 0.1726,
|
42205 |
+
"step": 6028
|
42206 |
+
},
|
42207 |
+
{
|
42208 |
+
"epoch": 0.24377571340483387,
|
42209 |
+
"grad_norm": 2.8471567630767822,
|
42210 |
+
"learning_rate": 8.637168056963357e-05,
|
42211 |
+
"loss": 0.0976,
|
42212 |
+
"step": 6029
|
42213 |
+
},
|
42214 |
+
{
|
42215 |
+
"epoch": 0.2438161472601009,
|
42216 |
+
"grad_norm": 6.479422092437744,
|
42217 |
+
"learning_rate": 8.636730430047593e-05,
|
42218 |
+
"loss": 0.2516,
|
42219 |
+
"step": 6030
|
42220 |
+
},
|
42221 |
+
{
|
42222 |
+
"epoch": 0.2438565811153679,
|
42223 |
+
"grad_norm": 2.0430047512054443,
|
42224 |
+
"learning_rate": 8.636292743969294e-05,
|
42225 |
+
"loss": 0.1564,
|
42226 |
+
"step": 6031
|
42227 |
+
},
|
42228 |
+
{
|
42229 |
+
"epoch": 0.2438970149706349,
|
42230 |
+
"grad_norm": 3.429826498031616,
|
42231 |
+
"learning_rate": 8.635854998735582e-05,
|
42232 |
+
"loss": 0.1513,
|
42233 |
+
"step": 6032
|
42234 |
+
},
|
42235 |
+
{
|
42236 |
+
"epoch": 0.24393744882590193,
|
42237 |
+
"grad_norm": 2.052880048751831,
|
42238 |
+
"learning_rate": 8.635417194353578e-05,
|
42239 |
+
"loss": 0.0597,
|
42240 |
+
"step": 6033
|
42241 |
+
},
|
42242 |
+
{
|
42243 |
+
"epoch": 0.24397788268116893,
|
42244 |
+
"grad_norm": 8.362695693969727,
|
42245 |
+
"learning_rate": 8.634979330830404e-05,
|
42246 |
+
"loss": 0.2508,
|
42247 |
+
"step": 6034
|
42248 |
+
},
|
42249 |
+
{
|
42250 |
+
"epoch": 0.24401831653643596,
|
42251 |
+
"grad_norm": 2.54459285736084,
|
42252 |
+
"learning_rate": 8.634541408173182e-05,
|
42253 |
+
"loss": 0.0596,
|
42254 |
+
"step": 6035
|
42255 |
+
},
|
42256 |
+
{
|
42257 |
+
"epoch": 0.24405875039170297,
|
42258 |
+
"grad_norm": 4.136692523956299,
|
42259 |
+
"learning_rate": 8.634103426389037e-05,
|
42260 |
+
"loss": 0.1318,
|
42261 |
+
"step": 6036
|
42262 |
+
},
|
42263 |
+
{
|
42264 |
+
"epoch": 0.24409918424697,
|
42265 |
+
"grad_norm": 10.016950607299805,
|
42266 |
+
"learning_rate": 8.633665385485094e-05,
|
42267 |
+
"loss": 0.2157,
|
42268 |
+
"step": 6037
|
42269 |
+
},
|
42270 |
+
{
|
42271 |
+
"epoch": 0.244139618102237,
|
42272 |
+
"grad_norm": 4.006833076477051,
|
42273 |
+
"learning_rate": 8.633227285468481e-05,
|
42274 |
+
"loss": 0.232,
|
42275 |
+
"step": 6038
|
42276 |
+
},
|
42277 |
+
{
|
42278 |
+
"epoch": 0.24418005195750403,
|
42279 |
+
"grad_norm": 3.8136184215545654,
|
42280 |
+
"learning_rate": 8.632789126346322e-05,
|
42281 |
+
"loss": 0.0961,
|
42282 |
+
"step": 6039
|
42283 |
+
},
|
42284 |
+
{
|
42285 |
+
"epoch": 0.24422048581277103,
|
42286 |
+
"grad_norm": 7.684732913970947,
|
42287 |
+
"learning_rate": 8.632350908125748e-05,
|
42288 |
+
"loss": 0.1847,
|
42289 |
+
"step": 6040
|
42290 |
+
},
|
42291 |
+
{
|
42292 |
+
"epoch": 0.24426091966803803,
|
42293 |
+
"grad_norm": 3.978747606277466,
|
42294 |
+
"learning_rate": 8.631912630813885e-05,
|
42295 |
+
"loss": 0.3372,
|
42296 |
+
"step": 6041
|
42297 |
+
},
|
42298 |
+
{
|
42299 |
+
"epoch": 0.24430135352330506,
|
42300 |
+
"grad_norm": 5.78717565536499,
|
42301 |
+
"learning_rate": 8.631474294417864e-05,
|
42302 |
+
"loss": 0.1573,
|
42303 |
+
"step": 6042
|
42304 |
+
},
|
42305 |
+
{
|
42306 |
+
"epoch": 0.24434178737857207,
|
42307 |
+
"grad_norm": 3.336500406265259,
|
42308 |
+
"learning_rate": 8.631035898944817e-05,
|
42309 |
+
"loss": 0.1248,
|
42310 |
+
"step": 6043
|
42311 |
+
},
|
42312 |
+
{
|
42313 |
+
"epoch": 0.2443822212338391,
|
42314 |
+
"grad_norm": 3.878305673599243,
|
42315 |
+
"learning_rate": 8.630597444401873e-05,
|
42316 |
+
"loss": 0.1764,
|
42317 |
+
"step": 6044
|
42318 |
+
},
|
42319 |
+
{
|
42320 |
+
"epoch": 0.2444226550891061,
|
42321 |
+
"grad_norm": 3.5070252418518066,
|
42322 |
+
"learning_rate": 8.63015893079617e-05,
|
42323 |
+
"loss": 0.1417,
|
42324 |
+
"step": 6045
|
42325 |
+
},
|
42326 |
+
{
|
42327 |
+
"epoch": 0.24446308894437313,
|
42328 |
+
"grad_norm": 9.929906845092773,
|
42329 |
+
"learning_rate": 8.629720358134836e-05,
|
42330 |
+
"loss": 0.2483,
|
42331 |
+
"step": 6046
|
42332 |
+
},
|
42333 |
+
{
|
42334 |
+
"epoch": 0.24450352279964013,
|
42335 |
+
"grad_norm": 2.988154888153076,
|
42336 |
+
"learning_rate": 8.62928172642501e-05,
|
42337 |
+
"loss": 0.2212,
|
42338 |
+
"step": 6047
|
42339 |
+
},
|
42340 |
+
{
|
42341 |
+
"epoch": 0.24454395665490716,
|
42342 |
+
"grad_norm": 2.689389944076538,
|
42343 |
+
"learning_rate": 8.628843035673824e-05,
|
42344 |
+
"loss": 0.0792,
|
42345 |
+
"step": 6048
|
42346 |
+
},
|
42347 |
+
{
|
42348 |
+
"epoch": 0.24458439051017417,
|
42349 |
+
"grad_norm": 12.93316650390625,
|
42350 |
+
"learning_rate": 8.628404285888418e-05,
|
42351 |
+
"loss": 0.2807,
|
42352 |
+
"step": 6049
|
42353 |
+
},
|
42354 |
+
{
|
42355 |
+
"epoch": 0.2446248243654412,
|
42356 |
+
"grad_norm": 9.077486038208008,
|
42357 |
+
"learning_rate": 8.627965477075928e-05,
|
42358 |
+
"loss": 0.3347,
|
42359 |
+
"step": 6050
|
42360 |
+
},
|
42361 |
+
{
|
42362 |
+
"epoch": 0.2446652582207082,
|
42363 |
+
"grad_norm": 15.951316833496094,
|
42364 |
+
"learning_rate": 8.627526609243492e-05,
|
42365 |
+
"loss": 0.3689,
|
42366 |
+
"step": 6051
|
42367 |
+
},
|
42368 |
+
{
|
42369 |
+
"epoch": 0.2447056920759752,
|
42370 |
+
"grad_norm": 2.9297943115234375,
|
42371 |
+
"learning_rate": 8.627087682398252e-05,
|
42372 |
+
"loss": 0.1827,
|
42373 |
+
"step": 6052
|
42374 |
+
},
|
42375 |
+
{
|
42376 |
+
"epoch": 0.24474612593124223,
|
42377 |
+
"grad_norm": 3.517852544784546,
|
42378 |
+
"learning_rate": 8.626648696547346e-05,
|
42379 |
+
"loss": 0.087,
|
42380 |
+
"step": 6053
|
42381 |
+
},
|
42382 |
+
{
|
42383 |
+
"epoch": 0.24478655978650923,
|
42384 |
+
"grad_norm": 11.33461856842041,
|
42385 |
+
"learning_rate": 8.626209651697916e-05,
|
42386 |
+
"loss": 0.337,
|
42387 |
+
"step": 6054
|
42388 |
+
},
|
42389 |
+
{
|
42390 |
+
"epoch": 0.24482699364177626,
|
42391 |
+
"grad_norm": 3.5571320056915283,
|
42392 |
+
"learning_rate": 8.625770547857104e-05,
|
42393 |
+
"loss": 0.1222,
|
42394 |
+
"step": 6055
|
42395 |
+
},
|
42396 |
+
{
|
42397 |
+
"epoch": 0.24486742749704327,
|
42398 |
+
"grad_norm": 3.389450788497925,
|
42399 |
+
"learning_rate": 8.625331385032056e-05,
|
42400 |
+
"loss": 0.1753,
|
42401 |
+
"step": 6056
|
42402 |
+
},
|
42403 |
+
{
|
42404 |
+
"epoch": 0.2449078613523103,
|
42405 |
+
"grad_norm": 4.702533721923828,
|
42406 |
+
"learning_rate": 8.624892163229912e-05,
|
42407 |
+
"loss": 0.1709,
|
42408 |
+
"step": 6057
|
42409 |
+
},
|
42410 |
+
{
|
42411 |
+
"epoch": 0.2449482952075773,
|
42412 |
+
"grad_norm": 6.528069972991943,
|
42413 |
+
"learning_rate": 8.624452882457821e-05,
|
42414 |
+
"loss": 0.1442,
|
42415 |
+
"step": 6058
|
42416 |
+
},
|
42417 |
+
{
|
42418 |
+
"epoch": 0.24498872906284433,
|
42419 |
+
"grad_norm": 2.3681833744049072,
|
42420 |
+
"learning_rate": 8.624013542722927e-05,
|
42421 |
+
"loss": 0.1071,
|
42422 |
+
"step": 6059
|
42423 |
+
},
|
42424 |
+
{
|
42425 |
+
"epoch": 0.24502916291811133,
|
42426 |
+
"grad_norm": 6.04899263381958,
|
42427 |
+
"learning_rate": 8.623574144032379e-05,
|
42428 |
+
"loss": 0.2262,
|
42429 |
+
"step": 6060
|
42430 |
+
},
|
42431 |
+
{
|
42432 |
+
"epoch": 0.24506959677337836,
|
42433 |
+
"grad_norm": 3.0587050914764404,
|
42434 |
+
"learning_rate": 8.623134686393324e-05,
|
42435 |
+
"loss": 0.1647,
|
42436 |
+
"step": 6061
|
42437 |
+
},
|
42438 |
+
{
|
42439 |
+
"epoch": 0.24511003062864536,
|
42440 |
+
"grad_norm": 3.833815813064575,
|
42441 |
+
"learning_rate": 8.622695169812911e-05,
|
42442 |
+
"loss": 0.1766,
|
42443 |
+
"step": 6062
|
42444 |
+
},
|
42445 |
+
{
|
42446 |
+
"epoch": 0.24515046448391237,
|
42447 |
+
"grad_norm": 5.266330718994141,
|
42448 |
+
"learning_rate": 8.622255594298291e-05,
|
42449 |
+
"loss": 0.2156,
|
42450 |
+
"step": 6063
|
42451 |
+
},
|
42452 |
+
{
|
42453 |
+
"epoch": 0.2451908983391794,
|
42454 |
+
"grad_norm": 3.1624538898468018,
|
42455 |
+
"learning_rate": 8.621815959856614e-05,
|
42456 |
+
"loss": 0.1147,
|
42457 |
+
"step": 6064
|
42458 |
+
},
|
42459 |
+
{
|
42460 |
+
"epoch": 0.2452313321944464,
|
42461 |
+
"grad_norm": 7.124099254608154,
|
42462 |
+
"learning_rate": 8.621376266495031e-05,
|
42463 |
+
"loss": 0.274,
|
42464 |
+
"step": 6065
|
42465 |
+
},
|
42466 |
+
{
|
42467 |
+
"epoch": 0.24527176604971343,
|
42468 |
+
"grad_norm": 2.109743118286133,
|
42469 |
+
"learning_rate": 8.620936514220699e-05,
|
42470 |
+
"loss": 0.0597,
|
42471 |
+
"step": 6066
|
42472 |
+
},
|
42473 |
+
{
|
42474 |
+
"epoch": 0.24531219990498043,
|
42475 |
+
"grad_norm": 5.317684650421143,
|
42476 |
+
"learning_rate": 8.620496703040769e-05,
|
42477 |
+
"loss": 0.2001,
|
42478 |
+
"step": 6067
|
42479 |
+
},
|
42480 |
+
{
|
42481 |
+
"epoch": 0.24535263376024746,
|
42482 |
+
"grad_norm": 4.574336528778076,
|
42483 |
+
"learning_rate": 8.620056832962394e-05,
|
42484 |
+
"loss": 0.1516,
|
42485 |
+
"step": 6068
|
42486 |
+
},
|
42487 |
+
{
|
42488 |
+
"epoch": 0.24539306761551447,
|
42489 |
+
"grad_norm": 4.148901462554932,
|
42490 |
+
"learning_rate": 8.619616903992733e-05,
|
42491 |
+
"loss": 0.0779,
|
42492 |
+
"step": 6069
|
42493 |
+
},
|
42494 |
+
{
|
42495 |
+
"epoch": 0.2454335014707815,
|
42496 |
+
"grad_norm": 5.344276428222656,
|
42497 |
+
"learning_rate": 8.619176916138944e-05,
|
42498 |
+
"loss": 0.1638,
|
42499 |
+
"step": 6070
|
42500 |
+
},
|
42501 |
+
{
|
42502 |
+
"epoch": 0.2454739353260485,
|
42503 |
+
"grad_norm": 4.519772529602051,
|
42504 |
+
"learning_rate": 8.61873686940818e-05,
|
42505 |
+
"loss": 0.2012,
|
42506 |
+
"step": 6071
|
42507 |
+
},
|
42508 |
+
{
|
42509 |
+
"epoch": 0.24551436918131553,
|
42510 |
+
"grad_norm": 4.1637115478515625,
|
42511 |
+
"learning_rate": 8.618296763807603e-05,
|
42512 |
+
"loss": 0.1288,
|
42513 |
+
"step": 6072
|
42514 |
+
},
|
42515 |
+
{
|
42516 |
+
"epoch": 0.24555480303658253,
|
42517 |
+
"grad_norm": 8.29826545715332,
|
42518 |
+
"learning_rate": 8.617856599344372e-05,
|
42519 |
+
"loss": 0.2354,
|
42520 |
+
"step": 6073
|
42521 |
+
},
|
42522 |
+
{
|
42523 |
+
"epoch": 0.24559523689184953,
|
42524 |
+
"grad_norm": 6.626035213470459,
|
42525 |
+
"learning_rate": 8.617416376025649e-05,
|
42526 |
+
"loss": 0.1596,
|
42527 |
+
"step": 6074
|
42528 |
+
},
|
42529 |
+
{
|
42530 |
+
"epoch": 0.24563567074711656,
|
42531 |
+
"grad_norm": 5.0871477127075195,
|
42532 |
+
"learning_rate": 8.616976093858591e-05,
|
42533 |
+
"loss": 0.1753,
|
42534 |
+
"step": 6075
|
42535 |
+
},
|
42536 |
+
{
|
42537 |
+
"epoch": 0.24567610460238357,
|
42538 |
+
"grad_norm": 4.304733753204346,
|
42539 |
+
"learning_rate": 8.616535752850366e-05,
|
42540 |
+
"loss": 0.2885,
|
42541 |
+
"step": 6076
|
42542 |
+
},
|
42543 |
+
{
|
42544 |
+
"epoch": 0.2457165384576506,
|
42545 |
+
"grad_norm": 3.2352781295776367,
|
42546 |
+
"learning_rate": 8.616095353008135e-05,
|
42547 |
+
"loss": 0.1896,
|
42548 |
+
"step": 6077
|
42549 |
+
},
|
42550 |
+
{
|
42551 |
+
"epoch": 0.2457569723129176,
|
42552 |
+
"grad_norm": 2.7523484230041504,
|
42553 |
+
"learning_rate": 8.615654894339063e-05,
|
42554 |
+
"loss": 0.1781,
|
42555 |
+
"step": 6078
|
42556 |
+
},
|
42557 |
+
{
|
42558 |
+
"epoch": 0.24579740616818463,
|
42559 |
+
"grad_norm": 3.727778673171997,
|
42560 |
+
"learning_rate": 8.615214376850314e-05,
|
42561 |
+
"loss": 0.1103,
|
42562 |
+
"step": 6079
|
42563 |
+
},
|
42564 |
+
{
|
42565 |
+
"epoch": 0.24583784002345163,
|
42566 |
+
"grad_norm": 3.9669647216796875,
|
42567 |
+
"learning_rate": 8.614773800549055e-05,
|
42568 |
+
"loss": 0.0879,
|
42569 |
+
"step": 6080
|
42570 |
+
},
|
42571 |
+
{
|
42572 |
+
"epoch": 0.24587827387871866,
|
42573 |
+
"grad_norm": 6.558189392089844,
|
42574 |
+
"learning_rate": 8.614333165442454e-05,
|
42575 |
+
"loss": 0.1847,
|
42576 |
+
"step": 6081
|
42577 |
+
},
|
42578 |
+
{
|
42579 |
+
"epoch": 0.24591870773398566,
|
42580 |
+
"grad_norm": 6.306224346160889,
|
42581 |
+
"learning_rate": 8.61389247153768e-05,
|
42582 |
+
"loss": 0.1495,
|
42583 |
+
"step": 6082
|
42584 |
+
},
|
42585 |
+
{
|
42586 |
+
"epoch": 0.2459591415892527,
|
42587 |
+
"grad_norm": 3.4565958976745605,
|
42588 |
+
"learning_rate": 8.6134517188419e-05,
|
42589 |
+
"loss": 0.0916,
|
42590 |
+
"step": 6083
|
42591 |
+
},
|
42592 |
+
{
|
42593 |
+
"epoch": 0.2459995754445197,
|
42594 |
+
"grad_norm": 4.450873851776123,
|
42595 |
+
"learning_rate": 8.613010907362287e-05,
|
42596 |
+
"loss": 0.1449,
|
42597 |
+
"step": 6084
|
42598 |
+
},
|
42599 |
+
{
|
42600 |
+
"epoch": 0.2460400092997867,
|
42601 |
+
"grad_norm": 6.050655364990234,
|
42602 |
+
"learning_rate": 8.612570037106009e-05,
|
42603 |
+
"loss": 0.2411,
|
42604 |
+
"step": 6085
|
42605 |
+
},
|
42606 |
+
{
|
42607 |
+
"epoch": 0.24608044315505373,
|
42608 |
+
"grad_norm": 4.706817150115967,
|
42609 |
+
"learning_rate": 8.61212910808024e-05,
|
42610 |
+
"loss": 0.0666,
|
42611 |
+
"step": 6086
|
42612 |
+
},
|
42613 |
+
{
|
42614 |
+
"epoch": 0.24612087701032073,
|
42615 |
+
"grad_norm": 2.352297067642212,
|
42616 |
+
"learning_rate": 8.611688120292155e-05,
|
42617 |
+
"loss": 0.1034,
|
42618 |
+
"step": 6087
|
42619 |
+
},
|
42620 |
+
{
|
42621 |
+
"epoch": 0.24616131086558776,
|
42622 |
+
"grad_norm": 4.81610107421875,
|
42623 |
+
"learning_rate": 8.611247073748922e-05,
|
42624 |
+
"loss": 0.0981,
|
42625 |
+
"step": 6088
|
42626 |
+
},
|
42627 |
+
{
|
42628 |
+
"epoch": 0.24620174472085476,
|
42629 |
+
"grad_norm": 8.78715991973877,
|
42630 |
+
"learning_rate": 8.610805968457721e-05,
|
42631 |
+
"loss": 0.211,
|
42632 |
+
"step": 6089
|
42633 |
+
},
|
42634 |
+
{
|
42635 |
+
"epoch": 0.2462421785761218,
|
42636 |
+
"grad_norm": 6.008256912231445,
|
42637 |
+
"learning_rate": 8.610364804425728e-05,
|
42638 |
+
"loss": 0.0955,
|
42639 |
+
"step": 6090
|
42640 |
+
},
|
42641 |
+
{
|
42642 |
+
"epoch": 0.2462826124313888,
|
42643 |
+
"grad_norm": 4.646786212921143,
|
42644 |
+
"learning_rate": 8.609923581660118e-05,
|
42645 |
+
"loss": 0.2153,
|
42646 |
+
"step": 6091
|
42647 |
+
},
|
42648 |
+
{
|
42649 |
+
"epoch": 0.24632304628665583,
|
42650 |
+
"grad_norm": 1.5845496654510498,
|
42651 |
+
"learning_rate": 8.609482300168067e-05,
|
42652 |
+
"loss": 0.0434,
|
42653 |
+
"step": 6092
|
42654 |
+
},
|
42655 |
+
{
|
42656 |
+
"epoch": 0.24636348014192283,
|
42657 |
+
"grad_norm": 2.4028897285461426,
|
42658 |
+
"learning_rate": 8.609040959956759e-05,
|
42659 |
+
"loss": 0.0609,
|
42660 |
+
"step": 6093
|
42661 |
+
},
|
42662 |
+
{
|
42663 |
+
"epoch": 0.24640391399718986,
|
42664 |
+
"grad_norm": 36.975887298583984,
|
42665 |
+
"learning_rate": 8.608599561033369e-05,
|
42666 |
+
"loss": 0.1087,
|
42667 |
+
"step": 6094
|
42668 |
+
},
|
42669 |
+
{
|
42670 |
+
"epoch": 0.24644434785245686,
|
42671 |
+
"grad_norm": 7.599268913269043,
|
42672 |
+
"learning_rate": 8.60815810340508e-05,
|
42673 |
+
"loss": 0.2089,
|
42674 |
+
"step": 6095
|
42675 |
+
},
|
42676 |
+
{
|
42677 |
+
"epoch": 0.24648478170772387,
|
42678 |
+
"grad_norm": 2.8600575923919678,
|
42679 |
+
"learning_rate": 8.607716587079074e-05,
|
42680 |
+
"loss": 0.0742,
|
42681 |
+
"step": 6096
|
42682 |
+
},
|
42683 |
+
{
|
42684 |
+
"epoch": 0.2465252155629909,
|
42685 |
+
"grad_norm": 2.9021806716918945,
|
42686 |
+
"learning_rate": 8.607275012062532e-05,
|
42687 |
+
"loss": 0.0799,
|
42688 |
+
"step": 6097
|
42689 |
+
},
|
42690 |
+
{
|
42691 |
+
"epoch": 0.2465656494182579,
|
42692 |
+
"grad_norm": 4.6386332511901855,
|
42693 |
+
"learning_rate": 8.606833378362639e-05,
|
42694 |
+
"loss": 0.1038,
|
42695 |
+
"step": 6098
|
42696 |
+
},
|
42697 |
+
{
|
42698 |
+
"epoch": 0.24660608327352493,
|
42699 |
+
"grad_norm": 5.46524715423584,
|
42700 |
+
"learning_rate": 8.606391685986579e-05,
|
42701 |
+
"loss": 0.1079,
|
42702 |
+
"step": 6099
|
42703 |
+
},
|
42704 |
+
{
|
42705 |
+
"epoch": 0.24664651712879193,
|
42706 |
+
"grad_norm": 4.315202236175537,
|
42707 |
+
"learning_rate": 8.605949934941537e-05,
|
42708 |
+
"loss": 0.1017,
|
42709 |
+
"step": 6100
|
42710 |
+
},
|
42711 |
+
{
|
42712 |
+
"epoch": 0.24668695098405896,
|
42713 |
+
"grad_norm": 4.34230375289917,
|
42714 |
+
"learning_rate": 8.6055081252347e-05,
|
42715 |
+
"loss": 0.135,
|
42716 |
+
"step": 6101
|
42717 |
+
},
|
42718 |
+
{
|
42719 |
+
"epoch": 0.24672738483932596,
|
42720 |
+
"grad_norm": 10.488526344299316,
|
42721 |
+
"learning_rate": 8.605066256873256e-05,
|
42722 |
+
"loss": 0.2443,
|
42723 |
+
"step": 6102
|
42724 |
+
},
|
42725 |
+
{
|
42726 |
+
"epoch": 0.246767818694593,
|
42727 |
+
"grad_norm": 4.642398357391357,
|
42728 |
+
"learning_rate": 8.604624329864392e-05,
|
42729 |
+
"loss": 0.1518,
|
42730 |
+
"step": 6103
|
42731 |
+
},
|
42732 |
+
{
|
42733 |
+
"epoch": 0.24680825254986,
|
42734 |
+
"grad_norm": 3.78501296043396,
|
42735 |
+
"learning_rate": 8.604182344215298e-05,
|
42736 |
+
"loss": 0.0736,
|
42737 |
+
"step": 6104
|
42738 |
+
},
|
42739 |
+
{
|
42740 |
+
"epoch": 0.246848686405127,
|
42741 |
+
"grad_norm": 9.167596817016602,
|
42742 |
+
"learning_rate": 8.603740299933164e-05,
|
42743 |
+
"loss": 0.3418,
|
42744 |
+
"step": 6105
|
42745 |
+
},
|
42746 |
+
{
|
42747 |
+
"epoch": 0.24688912026039403,
|
42748 |
+
"grad_norm": 7.805789947509766,
|
42749 |
+
"learning_rate": 8.603298197025181e-05,
|
42750 |
+
"loss": 0.4001,
|
42751 |
+
"step": 6106
|
42752 |
+
},
|
42753 |
+
{
|
42754 |
+
"epoch": 0.24692955411566103,
|
42755 |
+
"grad_norm": 6.4627275466918945,
|
42756 |
+
"learning_rate": 8.60285603549854e-05,
|
42757 |
+
"loss": 0.2139,
|
42758 |
+
"step": 6107
|
42759 |
+
},
|
42760 |
+
{
|
42761 |
+
"epoch": 0.24696998797092806,
|
42762 |
+
"grad_norm": 2.7034740447998047,
|
42763 |
+
"learning_rate": 8.602413815360439e-05,
|
42764 |
+
"loss": 0.0862,
|
42765 |
+
"step": 6108
|
42766 |
+
},
|
42767 |
+
{
|
42768 |
+
"epoch": 0.24701042182619506,
|
42769 |
+
"grad_norm": 3.240323066711426,
|
42770 |
+
"learning_rate": 8.601971536618067e-05,
|
42771 |
+
"loss": 0.1273,
|
42772 |
+
"step": 6109
|
42773 |
+
},
|
42774 |
+
{
|
42775 |
+
"epoch": 0.2470508556814621,
|
42776 |
+
"grad_norm": 4.7289886474609375,
|
42777 |
+
"learning_rate": 8.60152919927862e-05,
|
42778 |
+
"loss": 0.2225,
|
42779 |
+
"step": 6110
|
42780 |
+
},
|
42781 |
+
{
|
42782 |
+
"epoch": 0.2470912895367291,
|
42783 |
+
"grad_norm": 3.309140920639038,
|
42784 |
+
"learning_rate": 8.601086803349296e-05,
|
42785 |
+
"loss": 0.0905,
|
42786 |
+
"step": 6111
|
42787 |
+
},
|
42788 |
+
{
|
42789 |
+
"epoch": 0.24713172339199613,
|
42790 |
+
"grad_norm": 6.798061847686768,
|
42791 |
+
"learning_rate": 8.60064434883729e-05,
|
42792 |
+
"loss": 0.2208,
|
42793 |
+
"step": 6112
|
42794 |
+
},
|
42795 |
+
{
|
42796 |
+
"epoch": 0.24717215724726313,
|
42797 |
+
"grad_norm": 3.6816961765289307,
|
42798 |
+
"learning_rate": 8.6002018357498e-05,
|
42799 |
+
"loss": 0.1211,
|
42800 |
+
"step": 6113
|
42801 |
+
},
|
42802 |
+
{
|
42803 |
+
"epoch": 0.24721259110253016,
|
42804 |
+
"grad_norm": 3.832526683807373,
|
42805 |
+
"learning_rate": 8.599759264094025e-05,
|
42806 |
+
"loss": 0.0823,
|
42807 |
+
"step": 6114
|
42808 |
+
},
|
42809 |
+
{
|
42810 |
+
"epoch": 0.24725302495779716,
|
42811 |
+
"grad_norm": 3.5318474769592285,
|
42812 |
+
"learning_rate": 8.599316633877164e-05,
|
42813 |
+
"loss": 0.1528,
|
42814 |
+
"step": 6115
|
42815 |
+
},
|
42816 |
+
{
|
42817 |
+
"epoch": 0.24729345881306417,
|
42818 |
+
"grad_norm": 3.617408514022827,
|
42819 |
+
"learning_rate": 8.59887394510642e-05,
|
42820 |
+
"loss": 0.0787,
|
42821 |
+
"step": 6116
|
42822 |
+
},
|
42823 |
+
{
|
42824 |
+
"epoch": 0.2473338926683312,
|
42825 |
+
"grad_norm": 5.636387348175049,
|
42826 |
+
"learning_rate": 8.598431197788994e-05,
|
42827 |
+
"loss": 0.1539,
|
42828 |
+
"step": 6117
|
42829 |
+
},
|
42830 |
+
{
|
42831 |
+
"epoch": 0.2473743265235982,
|
42832 |
+
"grad_norm": 6.630018711090088,
|
42833 |
+
"learning_rate": 8.597988391932089e-05,
|
42834 |
+
"loss": 0.4171,
|
42835 |
+
"step": 6118
|
42836 |
+
},
|
42837 |
+
{
|
42838 |
+
"epoch": 0.24741476037886523,
|
42839 |
+
"grad_norm": 7.672569751739502,
|
42840 |
+
"learning_rate": 8.597545527542905e-05,
|
42841 |
+
"loss": 0.3314,
|
42842 |
+
"step": 6119
|
42843 |
+
},
|
42844 |
+
{
|
42845 |
+
"epoch": 0.24745519423413223,
|
42846 |
+
"grad_norm": 7.134288787841797,
|
42847 |
+
"learning_rate": 8.597102604628651e-05,
|
42848 |
+
"loss": 0.2681,
|
42849 |
+
"step": 6120
|
42850 |
+
},
|
42851 |
+
{
|
42852 |
+
"epoch": 0.24749562808939926,
|
42853 |
+
"grad_norm": 7.464917182922363,
|
42854 |
+
"learning_rate": 8.59665962319653e-05,
|
42855 |
+
"loss": 0.1661,
|
42856 |
+
"step": 6121
|
42857 |
+
},
|
42858 |
+
{
|
42859 |
+
"epoch": 0.24753606194466626,
|
42860 |
+
"grad_norm": 4.142001152038574,
|
42861 |
+
"learning_rate": 8.59621658325375e-05,
|
42862 |
+
"loss": 0.2082,
|
42863 |
+
"step": 6122
|
42864 |
+
},
|
42865 |
+
{
|
42866 |
+
"epoch": 0.2475764957999333,
|
42867 |
+
"grad_norm": 4.803020000457764,
|
42868 |
+
"learning_rate": 8.595773484807516e-05,
|
42869 |
+
"loss": 0.2261,
|
42870 |
+
"step": 6123
|
42871 |
+
},
|
42872 |
+
{
|
42873 |
+
"epoch": 0.2476169296552003,
|
42874 |
+
"grad_norm": 3.1889495849609375,
|
42875 |
+
"learning_rate": 8.595330327865038e-05,
|
42876 |
+
"loss": 0.141,
|
42877 |
+
"step": 6124
|
42878 |
+
},
|
42879 |
+
{
|
42880 |
+
"epoch": 0.24765736351046733,
|
42881 |
+
"grad_norm": 3.5087060928344727,
|
42882 |
+
"learning_rate": 8.594887112433528e-05,
|
42883 |
+
"loss": 0.1012,
|
42884 |
+
"step": 6125
|
42885 |
+
},
|
42886 |
+
{
|
42887 |
+
"epoch": 0.24769779736573433,
|
42888 |
+
"grad_norm": 3.0082900524139404,
|
42889 |
+
"learning_rate": 8.594443838520191e-05,
|
42890 |
+
"loss": 0.1087,
|
42891 |
+
"step": 6126
|
42892 |
+
},
|
42893 |
+
{
|
42894 |
+
"epoch": 0.24773823122100133,
|
42895 |
+
"grad_norm": 5.638223648071289,
|
42896 |
+
"learning_rate": 8.594000506132241e-05,
|
42897 |
+
"loss": 0.1215,
|
42898 |
+
"step": 6127
|
42899 |
+
},
|
42900 |
+
{
|
42901 |
+
"epoch": 0.24777866507626836,
|
42902 |
+
"grad_norm": 2.4479122161865234,
|
42903 |
+
"learning_rate": 8.59355711527689e-05,
|
42904 |
+
"loss": 0.1683,
|
42905 |
+
"step": 6128
|
42906 |
+
},
|
42907 |
+
{
|
42908 |
+
"epoch": 0.24781909893153536,
|
42909 |
+
"grad_norm": 4.197991847991943,
|
42910 |
+
"learning_rate": 8.59311366596135e-05,
|
42911 |
+
"loss": 0.093,
|
42912 |
+
"step": 6129
|
42913 |
+
},
|
42914 |
+
{
|
42915 |
+
"epoch": 0.2478595327868024,
|
42916 |
+
"grad_norm": 6.671213626861572,
|
42917 |
+
"learning_rate": 8.592670158192838e-05,
|
42918 |
+
"loss": 0.2066,
|
42919 |
+
"step": 6130
|
42920 |
+
},
|
42921 |
+
{
|
42922 |
+
"epoch": 0.2478999666420694,
|
42923 |
+
"grad_norm": 5.721414566040039,
|
42924 |
+
"learning_rate": 8.592226591978566e-05,
|
42925 |
+
"loss": 0.2359,
|
42926 |
+
"step": 6131
|
42927 |
+
},
|
42928 |
+
{
|
42929 |
+
"epoch": 0.24794040049733643,
|
42930 |
+
"grad_norm": 7.26845121383667,
|
42931 |
+
"learning_rate": 8.59178296732575e-05,
|
42932 |
+
"loss": 0.1464,
|
42933 |
+
"step": 6132
|
42934 |
+
},
|
42935 |
+
{
|
42936 |
+
"epoch": 0.24798083435260343,
|
42937 |
+
"grad_norm": 3.415755271911621,
|
42938 |
+
"learning_rate": 8.59133928424161e-05,
|
42939 |
+
"loss": 0.1107,
|
42940 |
+
"step": 6133
|
42941 |
+
},
|
42942 |
+
{
|
42943 |
+
"epoch": 0.24802126820787046,
|
42944 |
+
"grad_norm": 2.2114908695220947,
|
42945 |
+
"learning_rate": 8.59089554273336e-05,
|
42946 |
+
"loss": 0.0952,
|
42947 |
+
"step": 6134
|
42948 |
+
},
|
42949 |
+
{
|
42950 |
+
"epoch": 0.24806170206313746,
|
42951 |
+
"grad_norm": 4.224102973937988,
|
42952 |
+
"learning_rate": 8.590451742808222e-05,
|
42953 |
+
"loss": 0.1822,
|
42954 |
+
"step": 6135
|
42955 |
+
},
|
42956 |
+
{
|
42957 |
+
"epoch": 0.2481021359184045,
|
42958 |
+
"grad_norm": 5.626123428344727,
|
42959 |
+
"learning_rate": 8.590007884473413e-05,
|
42960 |
+
"loss": 0.1277,
|
42961 |
+
"step": 6136
|
42962 |
+
},
|
42963 |
+
{
|
42964 |
+
"epoch": 0.2481425697736715,
|
42965 |
+
"grad_norm": 2.7514543533325195,
|
42966 |
+
"learning_rate": 8.589563967736154e-05,
|
42967 |
+
"loss": 0.2924,
|
42968 |
+
"step": 6137
|
42969 |
+
},
|
42970 |
+
{
|
42971 |
+
"epoch": 0.2481830036289385,
|
42972 |
+
"grad_norm": 4.606257438659668,
|
42973 |
+
"learning_rate": 8.589119992603669e-05,
|
42974 |
+
"loss": 0.2058,
|
42975 |
+
"step": 6138
|
42976 |
+
},
|
42977 |
+
{
|
42978 |
+
"epoch": 0.24822343748420553,
|
42979 |
+
"grad_norm": 6.313202381134033,
|
42980 |
+
"learning_rate": 8.58867595908318e-05,
|
42981 |
+
"loss": 0.1745,
|
42982 |
+
"step": 6139
|
42983 |
+
},
|
42984 |
+
{
|
42985 |
+
"epoch": 0.24826387133947253,
|
42986 |
+
"grad_norm": 4.425397872924805,
|
42987 |
+
"learning_rate": 8.588231867181908e-05,
|
42988 |
+
"loss": 0.1273,
|
42989 |
+
"step": 6140
|
42990 |
+
},
|
42991 |
+
{
|
42992 |
+
"epoch": 0.24830430519473956,
|
42993 |
+
"grad_norm": 4.079669952392578,
|
42994 |
+
"learning_rate": 8.587787716907081e-05,
|
42995 |
+
"loss": 0.1124,
|
42996 |
+
"step": 6141
|
42997 |
+
},
|
42998 |
+
{
|
42999 |
+
"epoch": 0.24834473905000656,
|
43000 |
+
"grad_norm": 2.386849880218506,
|
43001 |
+
"learning_rate": 8.587343508265923e-05,
|
43002 |
+
"loss": 0.1633,
|
43003 |
+
"step": 6142
|
43004 |
+
},
|
43005 |
+
{
|
43006 |
+
"epoch": 0.2483851729052736,
|
43007 |
+
"grad_norm": 7.280966758728027,
|
43008 |
+
"learning_rate": 8.58689924126566e-05,
|
43009 |
+
"loss": 0.2683,
|
43010 |
+
"step": 6143
|
43011 |
+
},
|
43012 |
+
{
|
43013 |
+
"epoch": 0.2484256067605406,
|
43014 |
+
"grad_norm": 2.142650604248047,
|
43015 |
+
"learning_rate": 8.586454915913517e-05,
|
43016 |
+
"loss": 0.1698,
|
43017 |
+
"step": 6144
|
43018 |
+
},
|
43019 |
+
{
|
43020 |
+
"epoch": 0.24846604061580763,
|
43021 |
+
"grad_norm": 7.144465923309326,
|
43022 |
+
"learning_rate": 8.586010532216728e-05,
|
43023 |
+
"loss": 0.1161,
|
43024 |
+
"step": 6145
|
43025 |
+
},
|
43026 |
+
{
|
43027 |
+
"epoch": 0.24850647447107463,
|
43028 |
+
"grad_norm": 4.681589126586914,
|
43029 |
+
"learning_rate": 8.585566090182518e-05,
|
43030 |
+
"loss": 0.1186,
|
43031 |
+
"step": 6146
|
43032 |
+
},
|
43033 |
+
{
|
43034 |
+
"epoch": 0.24854690832634166,
|
43035 |
+
"grad_norm": 3.8650546073913574,
|
43036 |
+
"learning_rate": 8.58512158981812e-05,
|
43037 |
+
"loss": 0.1997,
|
43038 |
+
"step": 6147
|
43039 |
+
},
|
43040 |
+
{
|
43041 |
+
"epoch": 0.24858734218160866,
|
43042 |
+
"grad_norm": 9.318419456481934,
|
43043 |
+
"learning_rate": 8.584677031130761e-05,
|
43044 |
+
"loss": 0.2923,
|
43045 |
+
"step": 6148
|
43046 |
+
},
|
43047 |
+
{
|
43048 |
+
"epoch": 0.24862777603687566,
|
43049 |
+
"grad_norm": 4.759102821350098,
|
43050 |
+
"learning_rate": 8.584232414127678e-05,
|
43051 |
+
"loss": 0.1804,
|
43052 |
+
"step": 6149
|
43053 |
+
},
|
43054 |
+
{
|
43055 |
+
"epoch": 0.2486682098921427,
|
43056 |
+
"grad_norm": 3.2844924926757812,
|
43057 |
+
"learning_rate": 8.5837877388161e-05,
|
43058 |
+
"loss": 0.1689,
|
43059 |
+
"step": 6150
|
43060 |
+
},
|
43061 |
+
{
|
43062 |
+
"epoch": 0.2487086437474097,
|
43063 |
+
"grad_norm": 9.399534225463867,
|
43064 |
+
"learning_rate": 8.583343005203263e-05,
|
43065 |
+
"loss": 0.3574,
|
43066 |
+
"step": 6151
|
43067 |
+
},
|
43068 |
+
{
|
43069 |
+
"epoch": 0.24874907760267673,
|
43070 |
+
"grad_norm": 4.286575794219971,
|
43071 |
+
"learning_rate": 8.582898213296402e-05,
|
43072 |
+
"loss": 0.2597,
|
43073 |
+
"step": 6152
|
43074 |
+
},
|
43075 |
+
{
|
43076 |
+
"epoch": 0.24878951145794373,
|
43077 |
+
"grad_norm": 5.7185139656066895,
|
43078 |
+
"learning_rate": 8.582453363102752e-05,
|
43079 |
+
"loss": 0.2317,
|
43080 |
+
"step": 6153
|
43081 |
+
},
|
43082 |
+
{
|
43083 |
+
"epoch": 0.24882994531321076,
|
43084 |
+
"grad_norm": 5.736219882965088,
|
43085 |
+
"learning_rate": 8.58200845462955e-05,
|
43086 |
+
"loss": 0.1812,
|
43087 |
+
"step": 6154
|
43088 |
+
},
|
43089 |
+
{
|
43090 |
+
"epoch": 0.24887037916847776,
|
43091 |
+
"grad_norm": 5.837985515594482,
|
43092 |
+
"learning_rate": 8.581563487884035e-05,
|
43093 |
+
"loss": 0.1813,
|
43094 |
+
"step": 6155
|
43095 |
+
},
|
43096 |
+
{
|
43097 |
+
"epoch": 0.2489108130237448,
|
43098 |
+
"grad_norm": 5.658602237701416,
|
43099 |
+
"learning_rate": 8.581118462873446e-05,
|
43100 |
+
"loss": 0.1545,
|
43101 |
+
"step": 6156
|
43102 |
+
},
|
43103 |
+
{
|
43104 |
+
"epoch": 0.2489512468790118,
|
43105 |
+
"grad_norm": 4.969576358795166,
|
43106 |
+
"learning_rate": 8.58067337960502e-05,
|
43107 |
+
"loss": 0.1486,
|
43108 |
+
"step": 6157
|
43109 |
+
},
|
43110 |
+
{
|
43111 |
+
"epoch": 0.24899168073427883,
|
43112 |
+
"grad_norm": 2.319491386413574,
|
43113 |
+
"learning_rate": 8.580228238085999e-05,
|
43114 |
+
"loss": 0.1649,
|
43115 |
+
"step": 6158
|
43116 |
+
},
|
43117 |
+
{
|
43118 |
+
"epoch": 0.24903211458954583,
|
43119 |
+
"grad_norm": 3.767211437225342,
|
43120 |
+
"learning_rate": 8.579783038323626e-05,
|
43121 |
+
"loss": 0.1828,
|
43122 |
+
"step": 6159
|
43123 |
+
},
|
43124 |
+
{
|
43125 |
+
"epoch": 0.24907254844481283,
|
43126 |
+
"grad_norm": 3.399191379547119,
|
43127 |
+
"learning_rate": 8.579337780325142e-05,
|
43128 |
+
"loss": 0.1291,
|
43129 |
+
"step": 6160
|
43130 |
+
},
|
43131 |
+
{
|
43132 |
+
"epoch": 0.24911298230007986,
|
43133 |
+
"grad_norm": 3.8261640071868896,
|
43134 |
+
"learning_rate": 8.57889246409779e-05,
|
43135 |
+
"loss": 0.1082,
|
43136 |
+
"step": 6161
|
43137 |
+
},
|
43138 |
+
{
|
43139 |
+
"epoch": 0.24915341615534686,
|
43140 |
+
"grad_norm": 2.4140303134918213,
|
43141 |
+
"learning_rate": 8.578447089648816e-05,
|
43142 |
+
"loss": 0.1318,
|
43143 |
+
"step": 6162
|
43144 |
+
},
|
43145 |
+
{
|
43146 |
+
"epoch": 0.2491938500106139,
|
43147 |
+
"grad_norm": 7.104363918304443,
|
43148 |
+
"learning_rate": 8.578001656985466e-05,
|
43149 |
+
"loss": 0.1731,
|
43150 |
+
"step": 6163
|
43151 |
+
},
|
43152 |
+
{
|
43153 |
+
"epoch": 0.2492342838658809,
|
43154 |
+
"grad_norm": 7.832826137542725,
|
43155 |
+
"learning_rate": 8.577556166114984e-05,
|
43156 |
+
"loss": 0.299,
|
43157 |
+
"step": 6164
|
43158 |
+
},
|
43159 |
+
{
|
43160 |
+
"epoch": 0.24927471772114793,
|
43161 |
+
"grad_norm": 8.464363098144531,
|
43162 |
+
"learning_rate": 8.577110617044618e-05,
|
43163 |
+
"loss": 0.3275,
|
43164 |
+
"step": 6165
|
43165 |
+
},
|
43166 |
+
{
|
43167 |
+
"epoch": 0.24931515157641493,
|
43168 |
+
"grad_norm": 4.684128761291504,
|
43169 |
+
"learning_rate": 8.576665009781616e-05,
|
43170 |
+
"loss": 0.2326,
|
43171 |
+
"step": 6166
|
43172 |
+
},
|
43173 |
+
{
|
43174 |
+
"epoch": 0.24935558543168196,
|
43175 |
+
"grad_norm": 7.360625743865967,
|
43176 |
+
"learning_rate": 8.576219344333227e-05,
|
43177 |
+
"loss": 0.2637,
|
43178 |
+
"step": 6167
|
43179 |
+
},
|
43180 |
+
{
|
43181 |
+
"epoch": 0.24939601928694896,
|
43182 |
+
"grad_norm": 5.580816268920898,
|
43183 |
+
"learning_rate": 8.575773620706704e-05,
|
43184 |
+
"loss": 0.2078,
|
43185 |
+
"step": 6168
|
43186 |
+
},
|
43187 |
+
{
|
43188 |
+
"epoch": 0.24943645314221596,
|
43189 |
+
"grad_norm": 4.870668411254883,
|
43190 |
+
"learning_rate": 8.575327838909295e-05,
|
43191 |
+
"loss": 0.1566,
|
43192 |
+
"step": 6169
|
43193 |
+
},
|
43194 |
+
{
|
43195 |
+
"epoch": 0.249476886997483,
|
43196 |
+
"grad_norm": 3.1624975204467773,
|
43197 |
+
"learning_rate": 8.574881998948252e-05,
|
43198 |
+
"loss": 0.2021,
|
43199 |
+
"step": 6170
|
43200 |
+
},
|
43201 |
+
{
|
43202 |
+
"epoch": 0.24951732085275,
|
43203 |
+
"grad_norm": 2.021010398864746,
|
43204 |
+
"learning_rate": 8.574436100830829e-05,
|
43205 |
+
"loss": 0.0836,
|
43206 |
+
"step": 6171
|
43207 |
+
},
|
43208 |
+
{
|
43209 |
+
"epoch": 0.24955775470801703,
|
43210 |
+
"grad_norm": 2.359396457672119,
|
43211 |
+
"learning_rate": 8.573990144564281e-05,
|
43212 |
+
"loss": 0.1261,
|
43213 |
+
"step": 6172
|
43214 |
+
},
|
43215 |
+
{
|
43216 |
+
"epoch": 0.24959818856328403,
|
43217 |
+
"grad_norm": 14.166296005249023,
|
43218 |
+
"learning_rate": 8.57354413015586e-05,
|
43219 |
+
"loss": 0.3957,
|
43220 |
+
"step": 6173
|
43221 |
+
},
|
43222 |
+
{
|
43223 |
+
"epoch": 0.24963862241855106,
|
43224 |
+
"grad_norm": 4.486057281494141,
|
43225 |
+
"learning_rate": 8.573098057612822e-05,
|
43226 |
+
"loss": 0.1729,
|
43227 |
+
"step": 6174
|
43228 |
+
},
|
43229 |
+
{
|
43230 |
+
"epoch": 0.24967905627381806,
|
43231 |
+
"grad_norm": 4.341570854187012,
|
43232 |
+
"learning_rate": 8.572651926942428e-05,
|
43233 |
+
"loss": 0.0956,
|
43234 |
+
"step": 6175
|
43235 |
+
},
|
43236 |
+
{
|
43237 |
+
"epoch": 0.2497194901290851,
|
43238 |
+
"grad_norm": 6.579067707061768,
|
43239 |
+
"learning_rate": 8.572205738151931e-05,
|
43240 |
+
"loss": 0.1692,
|
43241 |
+
"step": 6176
|
43242 |
+
},
|
43243 |
+
{
|
43244 |
+
"epoch": 0.2497599239843521,
|
43245 |
+
"grad_norm": 4.9769206047058105,
|
43246 |
+
"learning_rate": 8.571759491248593e-05,
|
43247 |
+
"loss": 0.1608,
|
43248 |
+
"step": 6177
|
43249 |
+
},
|
43250 |
+
{
|
43251 |
+
"epoch": 0.24980035783961912,
|
43252 |
+
"grad_norm": 11.039288520812988,
|
43253 |
+
"learning_rate": 8.57131318623967e-05,
|
43254 |
+
"loss": 0.3244,
|
43255 |
+
"step": 6178
|
43256 |
+
},
|
43257 |
+
{
|
43258 |
+
"epoch": 0.24984079169488613,
|
43259 |
+
"grad_norm": 2.834900140762329,
|
43260 |
+
"learning_rate": 8.570866823132425e-05,
|
43261 |
+
"loss": 0.1042,
|
43262 |
+
"step": 6179
|
43263 |
+
},
|
43264 |
+
{
|
43265 |
+
"epoch": 0.24988122555015313,
|
43266 |
+
"grad_norm": 5.615602016448975,
|
43267 |
+
"learning_rate": 8.57042040193412e-05,
|
43268 |
+
"loss": 0.1991,
|
43269 |
+
"step": 6180
|
43270 |
+
},
|
43271 |
+
{
|
43272 |
+
"epoch": 0.24992165940542016,
|
43273 |
+
"grad_norm": 5.0720086097717285,
|
43274 |
+
"learning_rate": 8.569973922652015e-05,
|
43275 |
+
"loss": 0.1036,
|
43276 |
+
"step": 6181
|
43277 |
+
},
|
43278 |
+
{
|
43279 |
+
"epoch": 0.24996209326068716,
|
43280 |
+
"grad_norm": 4.303607940673828,
|
43281 |
+
"learning_rate": 8.569527385293375e-05,
|
43282 |
+
"loss": 0.2971,
|
43283 |
+
"step": 6182
|
43284 |
+
},
|
43285 |
+
{
|
43286 |
+
"epoch": 0.25000252711595417,
|
43287 |
+
"grad_norm": 2.68996262550354,
|
43288 |
+
"learning_rate": 8.569080789865463e-05,
|
43289 |
+
"loss": 0.0876,
|
43290 |
+
"step": 6183
|
43291 |
+
},
|
43292 |
+
{
|
43293 |
+
"epoch": 0.2500429609712212,
|
43294 |
+
"grad_norm": 3.8559789657592773,
|
43295 |
+
"learning_rate": 8.568634136375547e-05,
|
43296 |
+
"loss": 0.1207,
|
43297 |
+
"step": 6184
|
43298 |
+
},
|
43299 |
+
{
|
43300 |
+
"epoch": 0.2500833948264882,
|
43301 |
+
"grad_norm": 6.243417739868164,
|
43302 |
+
"learning_rate": 8.568187424830889e-05,
|
43303 |
+
"loss": 0.2087,
|
43304 |
+
"step": 6185
|
43305 |
+
},
|
43306 |
+
{
|
43307 |
+
"epoch": 0.25012382868175526,
|
43308 |
+
"grad_norm": 5.405614376068115,
|
43309 |
+
"learning_rate": 8.567740655238761e-05,
|
43310 |
+
"loss": 0.1594,
|
43311 |
+
"step": 6186
|
43312 |
+
},
|
43313 |
+
{
|
43314 |
+
"epoch": 0.25016426253702223,
|
43315 |
+
"grad_norm": 3.042715311050415,
|
43316 |
+
"learning_rate": 8.567293827606427e-05,
|
43317 |
+
"loss": 0.0963,
|
43318 |
+
"step": 6187
|
43319 |
+
},
|
43320 |
+
{
|
43321 |
+
"epoch": 0.25020469639228926,
|
43322 |
+
"grad_norm": 8.87225341796875,
|
43323 |
+
"learning_rate": 8.566846941941158e-05,
|
43324 |
+
"loss": 0.243,
|
43325 |
+
"step": 6188
|
43326 |
+
},
|
43327 |
+
{
|
43328 |
+
"epoch": 0.2502451302475563,
|
43329 |
+
"grad_norm": 3.334763288497925,
|
43330 |
+
"learning_rate": 8.566399998250224e-05,
|
43331 |
+
"loss": 0.0835,
|
43332 |
+
"step": 6189
|
43333 |
+
},
|
43334 |
+
{
|
43335 |
+
"epoch": 0.2502855641028233,
|
43336 |
+
"grad_norm": 5.4543538093566895,
|
43337 |
+
"learning_rate": 8.565952996540893e-05,
|
43338 |
+
"loss": 0.1584,
|
43339 |
+
"step": 6190
|
43340 |
+
},
|
43341 |
+
{
|
43342 |
+
"epoch": 0.2503259979580903,
|
43343 |
+
"grad_norm": 6.160861968994141,
|
43344 |
+
"learning_rate": 8.565505936820441e-05,
|
43345 |
+
"loss": 0.1298,
|
43346 |
+
"step": 6191
|
43347 |
+
},
|
43348 |
+
{
|
43349 |
+
"epoch": 0.2503664318133573,
|
43350 |
+
"grad_norm": 5.069806098937988,
|
43351 |
+
"learning_rate": 8.565058819096139e-05,
|
43352 |
+
"loss": 0.2268,
|
43353 |
+
"step": 6192
|
43354 |
+
},
|
43355 |
+
{
|
43356 |
+
"epoch": 0.25040686566862436,
|
43357 |
+
"grad_norm": 5.266380786895752,
|
43358 |
+
"learning_rate": 8.564611643375263e-05,
|
43359 |
+
"loss": 0.1274,
|
43360 |
+
"step": 6193
|
43361 |
+
},
|
43362 |
+
{
|
43363 |
+
"epoch": 0.25044729952389133,
|
43364 |
+
"grad_norm": 5.647245407104492,
|
43365 |
+
"learning_rate": 8.564164409665083e-05,
|
43366 |
+
"loss": 0.1708,
|
43367 |
+
"step": 6194
|
43368 |
+
},
|
43369 |
+
{
|
43370 |
+
"epoch": 0.25048773337915836,
|
43371 |
+
"grad_norm": 8.582027435302734,
|
43372 |
+
"learning_rate": 8.563717117972876e-05,
|
43373 |
+
"loss": 0.193,
|
43374 |
+
"step": 6195
|
43375 |
+
},
|
43376 |
+
{
|
43377 |
+
"epoch": 0.2505281672344254,
|
43378 |
+
"grad_norm": 4.458442687988281,
|
43379 |
+
"learning_rate": 8.563269768305922e-05,
|
43380 |
+
"loss": 0.3056,
|
43381 |
+
"step": 6196
|
43382 |
+
},
|
43383 |
+
{
|
43384 |
+
"epoch": 0.2505686010896924,
|
43385 |
+
"grad_norm": 3.2652697563171387,
|
43386 |
+
"learning_rate": 8.562822360671495e-05,
|
43387 |
+
"loss": 0.1163,
|
43388 |
+
"step": 6197
|
43389 |
+
},
|
43390 |
+
{
|
43391 |
+
"epoch": 0.2506090349449594,
|
43392 |
+
"grad_norm": 3.184579849243164,
|
43393 |
+
"learning_rate": 8.562374895076874e-05,
|
43394 |
+
"loss": 0.1153,
|
43395 |
+
"step": 6198
|
43396 |
+
},
|
43397 |
+
{
|
43398 |
+
"epoch": 0.2506494688002264,
|
43399 |
+
"grad_norm": 4.188009262084961,
|
43400 |
+
"learning_rate": 8.561927371529341e-05,
|
43401 |
+
"loss": 0.1579,
|
43402 |
+
"step": 6199
|
43403 |
+
},
|
43404 |
+
{
|
43405 |
+
"epoch": 0.25068990265549346,
|
43406 |
+
"grad_norm": 1.9505306482315063,
|
43407 |
+
"learning_rate": 8.561479790036175e-05,
|
43408 |
+
"loss": 0.0645,
|
43409 |
+
"step": 6200
|
43410 |
+
},
|
43411 |
+
{
|
43412 |
+
"epoch": 0.25073033651076043,
|
43413 |
+
"grad_norm": 4.53508996963501,
|
43414 |
+
"learning_rate": 8.561032150604655e-05,
|
43415 |
+
"loss": 0.1473,
|
43416 |
+
"step": 6201
|
43417 |
+
},
|
43418 |
+
{
|
43419 |
+
"epoch": 0.25077077036602746,
|
43420 |
+
"grad_norm": 3.168933391571045,
|
43421 |
+
"learning_rate": 8.560584453242064e-05,
|
43422 |
+
"loss": 0.1491,
|
43423 |
+
"step": 6202
|
43424 |
+
},
|
43425 |
+
{
|
43426 |
+
"epoch": 0.2508112042212945,
|
43427 |
+
"grad_norm": 3.8186163902282715,
|
43428 |
+
"learning_rate": 8.560136697955687e-05,
|
43429 |
+
"loss": 0.1645,
|
43430 |
+
"step": 6203
|
43431 |
+
},
|
43432 |
+
{
|
43433 |
+
"epoch": 0.2508516380765615,
|
43434 |
+
"grad_norm": 4.747062683105469,
|
43435 |
+
"learning_rate": 8.559688884752808e-05,
|
43436 |
+
"loss": 0.1574,
|
43437 |
+
"step": 6204
|
43438 |
+
},
|
43439 |
+
{
|
43440 |
+
"epoch": 0.2508920719318285,
|
43441 |
+
"grad_norm": 4.399488925933838,
|
43442 |
+
"learning_rate": 8.55924101364071e-05,
|
43443 |
+
"loss": 0.2242,
|
43444 |
+
"step": 6205
|
43445 |
+
},
|
43446 |
+
{
|
43447 |
+
"epoch": 0.25093250578709553,
|
43448 |
+
"grad_norm": 6.263681411743164,
|
43449 |
+
"learning_rate": 8.55879308462668e-05,
|
43450 |
+
"loss": 0.2975,
|
43451 |
+
"step": 6206
|
43452 |
+
},
|
43453 |
+
{
|
43454 |
+
"epoch": 0.25097293964236256,
|
43455 |
+
"grad_norm": 5.284090518951416,
|
43456 |
+
"learning_rate": 8.558345097718006e-05,
|
43457 |
+
"loss": 0.1785,
|
43458 |
+
"step": 6207
|
43459 |
+
},
|
43460 |
+
{
|
43461 |
+
"epoch": 0.2510133734976296,
|
43462 |
+
"grad_norm": 5.420502185821533,
|
43463 |
+
"learning_rate": 8.557897052921975e-05,
|
43464 |
+
"loss": 0.1035,
|
43465 |
+
"step": 6208
|
43466 |
+
},
|
43467 |
+
{
|
43468 |
+
"epoch": 0.25105380735289656,
|
43469 |
+
"grad_norm": 4.130248069763184,
|
43470 |
+
"learning_rate": 8.557448950245874e-05,
|
43471 |
+
"loss": 0.2202,
|
43472 |
+
"step": 6209
|
43473 |
+
},
|
43474 |
+
{
|
43475 |
+
"epoch": 0.2510942412081636,
|
43476 |
+
"grad_norm": 3.97829008102417,
|
43477 |
+
"learning_rate": 8.557000789696997e-05,
|
43478 |
+
"loss": 0.0936,
|
43479 |
+
"step": 6210
|
43480 |
+
},
|
43481 |
+
{
|
43482 |
+
"epoch": 0.2511346750634306,
|
43483 |
+
"grad_norm": 5.584280967712402,
|
43484 |
+
"learning_rate": 8.556552571282631e-05,
|
43485 |
+
"loss": 0.2014,
|
43486 |
+
"step": 6211
|
43487 |
+
},
|
43488 |
+
{
|
43489 |
+
"epoch": 0.2511751089186976,
|
43490 |
+
"grad_norm": 4.516365051269531,
|
43491 |
+
"learning_rate": 8.55610429501007e-05,
|
43492 |
+
"loss": 0.2308,
|
43493 |
+
"step": 6212
|
43494 |
+
},
|
43495 |
+
{
|
43496 |
+
"epoch": 0.25121554277396463,
|
43497 |
+
"grad_norm": 4.201866626739502,
|
43498 |
+
"learning_rate": 8.555655960886603e-05,
|
43499 |
+
"loss": 0.1656,
|
43500 |
+
"step": 6213
|
43501 |
+
},
|
43502 |
+
{
|
43503 |
+
"epoch": 0.25125597662923166,
|
43504 |
+
"grad_norm": 5.899411201477051,
|
43505 |
+
"learning_rate": 8.555207568919529e-05,
|
43506 |
+
"loss": 0.2022,
|
43507 |
+
"step": 6214
|
43508 |
}
|
43509 |
],
|
43510 |
"logging_steps": 1,
|
|
|
43524 |
"attributes": {}
|
43525 |
}
|
43526 |
},
|
43527 |
+
"total_flos": 3.8434118781055795e+17,
|
43528 |
"train_batch_size": 4,
|
43529 |
"trial_name": null,
|
43530 |
"trial_params": null
|