Training in progress, step 6692, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140880624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac2fe8d451f0ab45d6f961da028e8911b64d6ed1f2220a2e6edd89c375e27d3e
|
3 |
size 1140880624
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2281891834
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46fa9c29c6d244954c10f5d53d20240f6d9e3f125045615c022bb316e2fb8a7e
|
3 |
size 2281891834
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8715cc43bf2c9f766de377789704301ad4dc82fdc686f97b912c0c8325ab6be4
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56efa14fe932adfac746276a55300e3672684c2d6ac9f840cdaaf175e29dfbba
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -45178,6 +45178,1679 @@
|
|
45178 |
"learning_rate": 8.446399868418122e-05,
|
45179 |
"loss": 0.2542,
|
45180 |
"step": 6453
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45181 |
}
|
45182 |
],
|
45183 |
"logging_steps": 1,
|
@@ -45197,7 +46870,7 @@
|
|
45197 |
"attributes": {}
|
45198 |
}
|
45199 |
},
|
45200 |
-
"total_flos":
|
45201 |
"train_batch_size": 4,
|
45202 |
"trial_name": null,
|
45203 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.27058335944686485,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 6692,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
45178 |
"learning_rate": 8.446399868418122e-05,
|
45179 |
"loss": 0.2542,
|
45180 |
"step": 6453
|
45181 |
+
},
|
45182 |
+
{
|
45183 |
+
"epoch": 0.26096010189331525,
|
45184 |
+
"grad_norm": 4.479053020477295,
|
45185 |
+
"learning_rate": 8.44593780709199e-05,
|
45186 |
+
"loss": 0.1764,
|
45187 |
+
"step": 6454
|
45188 |
+
},
|
45189 |
+
{
|
45190 |
+
"epoch": 0.2610005357485823,
|
45191 |
+
"grad_norm": 3.381807804107666,
|
45192 |
+
"learning_rate": 8.445475689707147e-05,
|
45193 |
+
"loss": 0.1947,
|
45194 |
+
"step": 6455
|
45195 |
+
},
|
45196 |
+
{
|
45197 |
+
"epoch": 0.2610409696038493,
|
45198 |
+
"grad_norm": 3.185697555541992,
|
45199 |
+
"learning_rate": 8.445013516271116e-05,
|
45200 |
+
"loss": 0.1445,
|
45201 |
+
"step": 6456
|
45202 |
+
},
|
45203 |
+
{
|
45204 |
+
"epoch": 0.26108140345911635,
|
45205 |
+
"grad_norm": 3.825044870376587,
|
45206 |
+
"learning_rate": 8.444551286791411e-05,
|
45207 |
+
"loss": 0.1587,
|
45208 |
+
"step": 6457
|
45209 |
+
},
|
45210 |
+
{
|
45211 |
+
"epoch": 0.2611218373143833,
|
45212 |
+
"grad_norm": 2.2519567012786865,
|
45213 |
+
"learning_rate": 8.444089001275554e-05,
|
45214 |
+
"loss": 0.0917,
|
45215 |
+
"step": 6458
|
45216 |
+
},
|
45217 |
+
{
|
45218 |
+
"epoch": 0.26116227116965035,
|
45219 |
+
"grad_norm": 2.907572031021118,
|
45220 |
+
"learning_rate": 8.443626659731064e-05,
|
45221 |
+
"loss": 0.159,
|
45222 |
+
"step": 6459
|
45223 |
+
},
|
45224 |
+
{
|
45225 |
+
"epoch": 0.2612027050249174,
|
45226 |
+
"grad_norm": 3.546018123626709,
|
45227 |
+
"learning_rate": 8.443164262165461e-05,
|
45228 |
+
"loss": 0.2036,
|
45229 |
+
"step": 6460
|
45230 |
+
},
|
45231 |
+
{
|
45232 |
+
"epoch": 0.26124313888018436,
|
45233 |
+
"grad_norm": 4.949718475341797,
|
45234 |
+
"learning_rate": 8.44270180858627e-05,
|
45235 |
+
"loss": 0.1441,
|
45236 |
+
"step": 6461
|
45237 |
+
},
|
45238 |
+
{
|
45239 |
+
"epoch": 0.2612835727354514,
|
45240 |
+
"grad_norm": 4.2386674880981445,
|
45241 |
+
"learning_rate": 8.442239299001016e-05,
|
45242 |
+
"loss": 0.1225,
|
45243 |
+
"step": 6462
|
45244 |
+
},
|
45245 |
+
{
|
45246 |
+
"epoch": 0.2613240065907184,
|
45247 |
+
"grad_norm": 2.96069598197937,
|
45248 |
+
"learning_rate": 8.44177673341722e-05,
|
45249 |
+
"loss": 0.0953,
|
45250 |
+
"step": 6463
|
45251 |
+
},
|
45252 |
+
{
|
45253 |
+
"epoch": 0.26136444044598545,
|
45254 |
+
"grad_norm": 6.37571382522583,
|
45255 |
+
"learning_rate": 8.441314111842407e-05,
|
45256 |
+
"loss": 0.2887,
|
45257 |
+
"step": 6464
|
45258 |
+
},
|
45259 |
+
{
|
45260 |
+
"epoch": 0.2614048743012524,
|
45261 |
+
"grad_norm": 6.25275182723999,
|
45262 |
+
"learning_rate": 8.440851434284103e-05,
|
45263 |
+
"loss": 0.2222,
|
45264 |
+
"step": 6465
|
45265 |
+
},
|
45266 |
+
{
|
45267 |
+
"epoch": 0.26144530815651945,
|
45268 |
+
"grad_norm": 3.5580968856811523,
|
45269 |
+
"learning_rate": 8.440388700749835e-05,
|
45270 |
+
"loss": 0.1493,
|
45271 |
+
"step": 6466
|
45272 |
+
},
|
45273 |
+
{
|
45274 |
+
"epoch": 0.2614857420117865,
|
45275 |
+
"grad_norm": 8.079176902770996,
|
45276 |
+
"learning_rate": 8.439925911247132e-05,
|
45277 |
+
"loss": 0.1547,
|
45278 |
+
"step": 6467
|
45279 |
+
},
|
45280 |
+
{
|
45281 |
+
"epoch": 0.26152617586705346,
|
45282 |
+
"grad_norm": 4.430794715881348,
|
45283 |
+
"learning_rate": 8.439463065783523e-05,
|
45284 |
+
"loss": 0.077,
|
45285 |
+
"step": 6468
|
45286 |
+
},
|
45287 |
+
{
|
45288 |
+
"epoch": 0.2615666097223205,
|
45289 |
+
"grad_norm": 4.564385414123535,
|
45290 |
+
"learning_rate": 8.439000164366536e-05,
|
45291 |
+
"loss": 0.1131,
|
45292 |
+
"step": 6469
|
45293 |
+
},
|
45294 |
+
{
|
45295 |
+
"epoch": 0.2616070435775875,
|
45296 |
+
"grad_norm": 4.047739505767822,
|
45297 |
+
"learning_rate": 8.438537207003701e-05,
|
45298 |
+
"loss": 0.2903,
|
45299 |
+
"step": 6470
|
45300 |
+
},
|
45301 |
+
{
|
45302 |
+
"epoch": 0.26164747743285455,
|
45303 |
+
"grad_norm": 3.771772623062134,
|
45304 |
+
"learning_rate": 8.43807419370255e-05,
|
45305 |
+
"loss": 0.1291,
|
45306 |
+
"step": 6471
|
45307 |
+
},
|
45308 |
+
{
|
45309 |
+
"epoch": 0.2616879112881215,
|
45310 |
+
"grad_norm": 7.2533955574035645,
|
45311 |
+
"learning_rate": 8.437611124470616e-05,
|
45312 |
+
"loss": 0.2869,
|
45313 |
+
"step": 6472
|
45314 |
+
},
|
45315 |
+
{
|
45316 |
+
"epoch": 0.26172834514338855,
|
45317 |
+
"grad_norm": 7.090485572814941,
|
45318 |
+
"learning_rate": 8.437147999315436e-05,
|
45319 |
+
"loss": 0.1909,
|
45320 |
+
"step": 6473
|
45321 |
+
},
|
45322 |
+
{
|
45323 |
+
"epoch": 0.2617687789986556,
|
45324 |
+
"grad_norm": 8.594810485839844,
|
45325 |
+
"learning_rate": 8.436684818244535e-05,
|
45326 |
+
"loss": 0.2963,
|
45327 |
+
"step": 6474
|
45328 |
+
},
|
45329 |
+
{
|
45330 |
+
"epoch": 0.2618092128539226,
|
45331 |
+
"grad_norm": 10.380483627319336,
|
45332 |
+
"learning_rate": 8.436221581265456e-05,
|
45333 |
+
"loss": 0.4436,
|
45334 |
+
"step": 6475
|
45335 |
+
},
|
45336 |
+
{
|
45337 |
+
"epoch": 0.2618496467091896,
|
45338 |
+
"grad_norm": 5.023477554321289,
|
45339 |
+
"learning_rate": 8.435758288385732e-05,
|
45340 |
+
"loss": 0.1768,
|
45341 |
+
"step": 6476
|
45342 |
+
},
|
45343 |
+
{
|
45344 |
+
"epoch": 0.2618900805644566,
|
45345 |
+
"grad_norm": 3.6216633319854736,
|
45346 |
+
"learning_rate": 8.435294939612898e-05,
|
45347 |
+
"loss": 0.1173,
|
45348 |
+
"step": 6477
|
45349 |
+
},
|
45350 |
+
{
|
45351 |
+
"epoch": 0.26193051441972365,
|
45352 |
+
"grad_norm": 4.181687831878662,
|
45353 |
+
"learning_rate": 8.434831534954497e-05,
|
45354 |
+
"loss": 0.1436,
|
45355 |
+
"step": 6478
|
45356 |
+
},
|
45357 |
+
{
|
45358 |
+
"epoch": 0.2619709482749906,
|
45359 |
+
"grad_norm": 3.5942511558532715,
|
45360 |
+
"learning_rate": 8.434368074418063e-05,
|
45361 |
+
"loss": 0.1774,
|
45362 |
+
"step": 6479
|
45363 |
+
},
|
45364 |
+
{
|
45365 |
+
"epoch": 0.26201138213025765,
|
45366 |
+
"grad_norm": 4.015344142913818,
|
45367 |
+
"learning_rate": 8.433904558011136e-05,
|
45368 |
+
"loss": 0.1306,
|
45369 |
+
"step": 6480
|
45370 |
+
},
|
45371 |
+
{
|
45372 |
+
"epoch": 0.2620518159855247,
|
45373 |
+
"grad_norm": 1.9899013042449951,
|
45374 |
+
"learning_rate": 8.43344098574126e-05,
|
45375 |
+
"loss": 0.0908,
|
45376 |
+
"step": 6481
|
45377 |
+
},
|
45378 |
+
{
|
45379 |
+
"epoch": 0.2620922498407917,
|
45380 |
+
"grad_norm": 7.040347099304199,
|
45381 |
+
"learning_rate": 8.432977357615973e-05,
|
45382 |
+
"loss": 0.1741,
|
45383 |
+
"step": 6482
|
45384 |
+
},
|
45385 |
+
{
|
45386 |
+
"epoch": 0.2621326836960587,
|
45387 |
+
"grad_norm": 5.5926690101623535,
|
45388 |
+
"learning_rate": 8.432513673642817e-05,
|
45389 |
+
"loss": 0.2538,
|
45390 |
+
"step": 6483
|
45391 |
+
},
|
45392 |
+
{
|
45393 |
+
"epoch": 0.2621731175513257,
|
45394 |
+
"grad_norm": 5.606401443481445,
|
45395 |
+
"learning_rate": 8.432049933829339e-05,
|
45396 |
+
"loss": 0.179,
|
45397 |
+
"step": 6484
|
45398 |
+
},
|
45399 |
+
{
|
45400 |
+
"epoch": 0.26221355140659275,
|
45401 |
+
"grad_norm": 2.6667726039886475,
|
45402 |
+
"learning_rate": 8.43158613818308e-05,
|
45403 |
+
"loss": 0.1405,
|
45404 |
+
"step": 6485
|
45405 |
+
},
|
45406 |
+
{
|
45407 |
+
"epoch": 0.2622539852618598,
|
45408 |
+
"grad_norm": 3.666436195373535,
|
45409 |
+
"learning_rate": 8.431122286711586e-05,
|
45410 |
+
"loss": 0.1345,
|
45411 |
+
"step": 6486
|
45412 |
+
},
|
45413 |
+
{
|
45414 |
+
"epoch": 0.26229441911712675,
|
45415 |
+
"grad_norm": 1.826880693435669,
|
45416 |
+
"learning_rate": 8.430658379422401e-05,
|
45417 |
+
"loss": 0.0768,
|
45418 |
+
"step": 6487
|
45419 |
+
},
|
45420 |
+
{
|
45421 |
+
"epoch": 0.2623348529723938,
|
45422 |
+
"grad_norm": 8.996906280517578,
|
45423 |
+
"learning_rate": 8.430194416323076e-05,
|
45424 |
+
"loss": 0.3116,
|
45425 |
+
"step": 6488
|
45426 |
+
},
|
45427 |
+
{
|
45428 |
+
"epoch": 0.2623752868276608,
|
45429 |
+
"grad_norm": 7.172107696533203,
|
45430 |
+
"learning_rate": 8.429730397421155e-05,
|
45431 |
+
"loss": 0.2038,
|
45432 |
+
"step": 6489
|
45433 |
+
},
|
45434 |
+
{
|
45435 |
+
"epoch": 0.2624157206829278,
|
45436 |
+
"grad_norm": 4.744250774383545,
|
45437 |
+
"learning_rate": 8.42926632272419e-05,
|
45438 |
+
"loss": 0.2677,
|
45439 |
+
"step": 6490
|
45440 |
+
},
|
45441 |
+
{
|
45442 |
+
"epoch": 0.2624561545381948,
|
45443 |
+
"grad_norm": 6.7520012855529785,
|
45444 |
+
"learning_rate": 8.428802192239726e-05,
|
45445 |
+
"loss": 0.1968,
|
45446 |
+
"step": 6491
|
45447 |
+
},
|
45448 |
+
{
|
45449 |
+
"epoch": 0.26249658839346185,
|
45450 |
+
"grad_norm": 3.461371898651123,
|
45451 |
+
"learning_rate": 8.428338005975318e-05,
|
45452 |
+
"loss": 0.1477,
|
45453 |
+
"step": 6492
|
45454 |
+
},
|
45455 |
+
{
|
45456 |
+
"epoch": 0.2625370222487289,
|
45457 |
+
"grad_norm": 1.8159852027893066,
|
45458 |
+
"learning_rate": 8.427873763938514e-05,
|
45459 |
+
"loss": 0.067,
|
45460 |
+
"step": 6493
|
45461 |
+
},
|
45462 |
+
{
|
45463 |
+
"epoch": 0.26257745610399585,
|
45464 |
+
"grad_norm": 9.266837120056152,
|
45465 |
+
"learning_rate": 8.42740946613687e-05,
|
45466 |
+
"loss": 0.2863,
|
45467 |
+
"step": 6494
|
45468 |
+
},
|
45469 |
+
{
|
45470 |
+
"epoch": 0.2626178899592629,
|
45471 |
+
"grad_norm": 3.8387975692749023,
|
45472 |
+
"learning_rate": 8.426945112577937e-05,
|
45473 |
+
"loss": 0.1482,
|
45474 |
+
"step": 6495
|
45475 |
+
},
|
45476 |
+
{
|
45477 |
+
"epoch": 0.2626583238145299,
|
45478 |
+
"grad_norm": 2.9947237968444824,
|
45479 |
+
"learning_rate": 8.426480703269269e-05,
|
45480 |
+
"loss": 0.1598,
|
45481 |
+
"step": 6496
|
45482 |
+
},
|
45483 |
+
{
|
45484 |
+
"epoch": 0.26269875766979695,
|
45485 |
+
"grad_norm": 13.141474723815918,
|
45486 |
+
"learning_rate": 8.426016238218419e-05,
|
45487 |
+
"loss": 0.2665,
|
45488 |
+
"step": 6497
|
45489 |
+
},
|
45490 |
+
{
|
45491 |
+
"epoch": 0.2627391915250639,
|
45492 |
+
"grad_norm": 8.485552787780762,
|
45493 |
+
"learning_rate": 8.425551717432948e-05,
|
45494 |
+
"loss": 0.2041,
|
45495 |
+
"step": 6498
|
45496 |
+
},
|
45497 |
+
{
|
45498 |
+
"epoch": 0.26277962538033095,
|
45499 |
+
"grad_norm": 6.097568511962891,
|
45500 |
+
"learning_rate": 8.425087140920409e-05,
|
45501 |
+
"loss": 0.1209,
|
45502 |
+
"step": 6499
|
45503 |
+
},
|
45504 |
+
{
|
45505 |
+
"epoch": 0.262820059235598,
|
45506 |
+
"grad_norm": 6.649581432342529,
|
45507 |
+
"learning_rate": 8.424622508688363e-05,
|
45508 |
+
"loss": 0.1078,
|
45509 |
+
"step": 6500
|
45510 |
+
},
|
45511 |
+
{
|
45512 |
+
"epoch": 0.26286049309086496,
|
45513 |
+
"grad_norm": 5.81403112411499,
|
45514 |
+
"learning_rate": 8.424157820744364e-05,
|
45515 |
+
"loss": 0.2087,
|
45516 |
+
"step": 6501
|
45517 |
+
},
|
45518 |
+
{
|
45519 |
+
"epoch": 0.262900926946132,
|
45520 |
+
"grad_norm": 4.567441940307617,
|
45521 |
+
"learning_rate": 8.423693077095975e-05,
|
45522 |
+
"loss": 0.1554,
|
45523 |
+
"step": 6502
|
45524 |
+
},
|
45525 |
+
{
|
45526 |
+
"epoch": 0.262941360801399,
|
45527 |
+
"grad_norm": 6.034025192260742,
|
45528 |
+
"learning_rate": 8.423228277750755e-05,
|
45529 |
+
"loss": 0.197,
|
45530 |
+
"step": 6503
|
45531 |
+
},
|
45532 |
+
{
|
45533 |
+
"epoch": 0.26298179465666605,
|
45534 |
+
"grad_norm": 2.8581862449645996,
|
45535 |
+
"learning_rate": 8.422763422716267e-05,
|
45536 |
+
"loss": 0.1916,
|
45537 |
+
"step": 6504
|
45538 |
+
},
|
45539 |
+
{
|
45540 |
+
"epoch": 0.263022228511933,
|
45541 |
+
"grad_norm": 2.978179931640625,
|
45542 |
+
"learning_rate": 8.42229851200007e-05,
|
45543 |
+
"loss": 0.1003,
|
45544 |
+
"step": 6505
|
45545 |
+
},
|
45546 |
+
{
|
45547 |
+
"epoch": 0.26306266236720005,
|
45548 |
+
"grad_norm": 3.627148151397705,
|
45549 |
+
"learning_rate": 8.421833545609733e-05,
|
45550 |
+
"loss": 0.2098,
|
45551 |
+
"step": 6506
|
45552 |
+
},
|
45553 |
+
{
|
45554 |
+
"epoch": 0.2631030962224671,
|
45555 |
+
"grad_norm": 3.9005444049835205,
|
45556 |
+
"learning_rate": 8.421368523552815e-05,
|
45557 |
+
"loss": 0.1233,
|
45558 |
+
"step": 6507
|
45559 |
+
},
|
45560 |
+
{
|
45561 |
+
"epoch": 0.2631435300777341,
|
45562 |
+
"grad_norm": 6.3803181648254395,
|
45563 |
+
"learning_rate": 8.420903445836882e-05,
|
45564 |
+
"loss": 0.3498,
|
45565 |
+
"step": 6508
|
45566 |
+
},
|
45567 |
+
{
|
45568 |
+
"epoch": 0.2631839639330011,
|
45569 |
+
"grad_norm": 5.617633819580078,
|
45570 |
+
"learning_rate": 8.420438312469501e-05,
|
45571 |
+
"loss": 0.1782,
|
45572 |
+
"step": 6509
|
45573 |
+
},
|
45574 |
+
{
|
45575 |
+
"epoch": 0.2632243977882681,
|
45576 |
+
"grad_norm": 2.999321937561035,
|
45577 |
+
"learning_rate": 8.419973123458238e-05,
|
45578 |
+
"loss": 0.1713,
|
45579 |
+
"step": 6510
|
45580 |
+
},
|
45581 |
+
{
|
45582 |
+
"epoch": 0.26326483164353515,
|
45583 |
+
"grad_norm": 7.433416843414307,
|
45584 |
+
"learning_rate": 8.419507878810662e-05,
|
45585 |
+
"loss": 0.2645,
|
45586 |
+
"step": 6511
|
45587 |
+
},
|
45588 |
+
{
|
45589 |
+
"epoch": 0.2633052654988021,
|
45590 |
+
"grad_norm": 4.668217182159424,
|
45591 |
+
"learning_rate": 8.419042578534339e-05,
|
45592 |
+
"loss": 0.1612,
|
45593 |
+
"step": 6512
|
45594 |
+
},
|
45595 |
+
{
|
45596 |
+
"epoch": 0.26334569935406915,
|
45597 |
+
"grad_norm": 3.0045735836029053,
|
45598 |
+
"learning_rate": 8.41857722263684e-05,
|
45599 |
+
"loss": 0.2459,
|
45600 |
+
"step": 6513
|
45601 |
+
},
|
45602 |
+
{
|
45603 |
+
"epoch": 0.2633861332093362,
|
45604 |
+
"grad_norm": 1.820125937461853,
|
45605 |
+
"learning_rate": 8.418111811125737e-05,
|
45606 |
+
"loss": 0.1569,
|
45607 |
+
"step": 6514
|
45608 |
+
},
|
45609 |
+
{
|
45610 |
+
"epoch": 0.2634265670646032,
|
45611 |
+
"grad_norm": 2.8025760650634766,
|
45612 |
+
"learning_rate": 8.4176463440086e-05,
|
45613 |
+
"loss": 0.1436,
|
45614 |
+
"step": 6515
|
45615 |
+
},
|
45616 |
+
{
|
45617 |
+
"epoch": 0.2634670009198702,
|
45618 |
+
"grad_norm": 2.2238306999206543,
|
45619 |
+
"learning_rate": 8.417180821293e-05,
|
45620 |
+
"loss": 0.1373,
|
45621 |
+
"step": 6516
|
45622 |
+
},
|
45623 |
+
{
|
45624 |
+
"epoch": 0.2635074347751372,
|
45625 |
+
"grad_norm": 5.5867919921875,
|
45626 |
+
"learning_rate": 8.416715242986514e-05,
|
45627 |
+
"loss": 0.1568,
|
45628 |
+
"step": 6517
|
45629 |
+
},
|
45630 |
+
{
|
45631 |
+
"epoch": 0.26354786863040425,
|
45632 |
+
"grad_norm": 4.6446123123168945,
|
45633 |
+
"learning_rate": 8.416249609096712e-05,
|
45634 |
+
"loss": 0.1215,
|
45635 |
+
"step": 6518
|
45636 |
+
},
|
45637 |
+
{
|
45638 |
+
"epoch": 0.2635883024856713,
|
45639 |
+
"grad_norm": 1.404708981513977,
|
45640 |
+
"learning_rate": 8.415783919631169e-05,
|
45641 |
+
"loss": 0.095,
|
45642 |
+
"step": 6519
|
45643 |
+
},
|
45644 |
+
{
|
45645 |
+
"epoch": 0.26362873634093825,
|
45646 |
+
"grad_norm": 6.0271406173706055,
|
45647 |
+
"learning_rate": 8.415318174597461e-05,
|
45648 |
+
"loss": 0.1576,
|
45649 |
+
"step": 6520
|
45650 |
+
},
|
45651 |
+
{
|
45652 |
+
"epoch": 0.2636691701962053,
|
45653 |
+
"grad_norm": 6.995720386505127,
|
45654 |
+
"learning_rate": 8.41485237400317e-05,
|
45655 |
+
"loss": 0.2199,
|
45656 |
+
"step": 6521
|
45657 |
+
},
|
45658 |
+
{
|
45659 |
+
"epoch": 0.2637096040514723,
|
45660 |
+
"grad_norm": 6.817748069763184,
|
45661 |
+
"learning_rate": 8.414386517855868e-05,
|
45662 |
+
"loss": 0.2366,
|
45663 |
+
"step": 6522
|
45664 |
+
},
|
45665 |
+
{
|
45666 |
+
"epoch": 0.2637500379067393,
|
45667 |
+
"grad_norm": 4.0690765380859375,
|
45668 |
+
"learning_rate": 8.413920606163134e-05,
|
45669 |
+
"loss": 0.1487,
|
45670 |
+
"step": 6523
|
45671 |
+
},
|
45672 |
+
{
|
45673 |
+
"epoch": 0.2637904717620063,
|
45674 |
+
"grad_norm": 3.969592332839966,
|
45675 |
+
"learning_rate": 8.41345463893255e-05,
|
45676 |
+
"loss": 0.2808,
|
45677 |
+
"step": 6524
|
45678 |
+
},
|
45679 |
+
{
|
45680 |
+
"epoch": 0.26383090561727335,
|
45681 |
+
"grad_norm": 4.069786071777344,
|
45682 |
+
"learning_rate": 8.412988616171694e-05,
|
45683 |
+
"loss": 0.1688,
|
45684 |
+
"step": 6525
|
45685 |
+
},
|
45686 |
+
{
|
45687 |
+
"epoch": 0.2638713394725404,
|
45688 |
+
"grad_norm": 8.43940258026123,
|
45689 |
+
"learning_rate": 8.412522537888149e-05,
|
45690 |
+
"loss": 0.1824,
|
45691 |
+
"step": 6526
|
45692 |
+
},
|
45693 |
+
{
|
45694 |
+
"epoch": 0.26391177332780735,
|
45695 |
+
"grad_norm": 2.263195753097534,
|
45696 |
+
"learning_rate": 8.412056404089496e-05,
|
45697 |
+
"loss": 0.1638,
|
45698 |
+
"step": 6527
|
45699 |
+
},
|
45700 |
+
{
|
45701 |
+
"epoch": 0.2639522071830744,
|
45702 |
+
"grad_norm": 2.8283069133758545,
|
45703 |
+
"learning_rate": 8.41159021478332e-05,
|
45704 |
+
"loss": 0.1456,
|
45705 |
+
"step": 6528
|
45706 |
+
},
|
45707 |
+
{
|
45708 |
+
"epoch": 0.2639926410383414,
|
45709 |
+
"grad_norm": 5.247934341430664,
|
45710 |
+
"learning_rate": 8.411123969977204e-05,
|
45711 |
+
"loss": 0.157,
|
45712 |
+
"step": 6529
|
45713 |
+
},
|
45714 |
+
{
|
45715 |
+
"epoch": 0.26403307489360844,
|
45716 |
+
"grad_norm": 3.966691493988037,
|
45717 |
+
"learning_rate": 8.41065766967873e-05,
|
45718 |
+
"loss": 0.179,
|
45719 |
+
"step": 6530
|
45720 |
+
},
|
45721 |
+
{
|
45722 |
+
"epoch": 0.2640735087488754,
|
45723 |
+
"grad_norm": 5.709972381591797,
|
45724 |
+
"learning_rate": 8.410191313895489e-05,
|
45725 |
+
"loss": 0.2276,
|
45726 |
+
"step": 6531
|
45727 |
+
},
|
45728 |
+
{
|
45729 |
+
"epoch": 0.26411394260414245,
|
45730 |
+
"grad_norm": 3.630922555923462,
|
45731 |
+
"learning_rate": 8.409724902635064e-05,
|
45732 |
+
"loss": 0.1604,
|
45733 |
+
"step": 6532
|
45734 |
+
},
|
45735 |
+
{
|
45736 |
+
"epoch": 0.2641543764594095,
|
45737 |
+
"grad_norm": 1.5789424180984497,
|
45738 |
+
"learning_rate": 8.409258435905043e-05,
|
45739 |
+
"loss": 0.058,
|
45740 |
+
"step": 6533
|
45741 |
+
},
|
45742 |
+
{
|
45743 |
+
"epoch": 0.26419481031467645,
|
45744 |
+
"grad_norm": 5.126832008361816,
|
45745 |
+
"learning_rate": 8.408791913713015e-05,
|
45746 |
+
"loss": 0.2515,
|
45747 |
+
"step": 6534
|
45748 |
+
},
|
45749 |
+
{
|
45750 |
+
"epoch": 0.2642352441699435,
|
45751 |
+
"grad_norm": 5.651684761047363,
|
45752 |
+
"learning_rate": 8.40832533606657e-05,
|
45753 |
+
"loss": 0.1203,
|
45754 |
+
"step": 6535
|
45755 |
+
},
|
45756 |
+
{
|
45757 |
+
"epoch": 0.2642756780252105,
|
45758 |
+
"grad_norm": 3.721672296524048,
|
45759 |
+
"learning_rate": 8.407858702973299e-05,
|
45760 |
+
"loss": 0.1965,
|
45761 |
+
"step": 6536
|
45762 |
+
},
|
45763 |
+
{
|
45764 |
+
"epoch": 0.26431611188047754,
|
45765 |
+
"grad_norm": 2.2916038036346436,
|
45766 |
+
"learning_rate": 8.40739201444079e-05,
|
45767 |
+
"loss": 0.1112,
|
45768 |
+
"step": 6537
|
45769 |
+
},
|
45770 |
+
{
|
45771 |
+
"epoch": 0.2643565457357445,
|
45772 |
+
"grad_norm": 3.3301517963409424,
|
45773 |
+
"learning_rate": 8.406925270476638e-05,
|
45774 |
+
"loss": 0.1123,
|
45775 |
+
"step": 6538
|
45776 |
+
},
|
45777 |
+
{
|
45778 |
+
"epoch": 0.26439697959101155,
|
45779 |
+
"grad_norm": 4.490497589111328,
|
45780 |
+
"learning_rate": 8.406458471088437e-05,
|
45781 |
+
"loss": 0.11,
|
45782 |
+
"step": 6539
|
45783 |
+
},
|
45784 |
+
{
|
45785 |
+
"epoch": 0.2644374134462786,
|
45786 |
+
"grad_norm": 4.2007269859313965,
|
45787 |
+
"learning_rate": 8.405991616283778e-05,
|
45788 |
+
"loss": 0.2107,
|
45789 |
+
"step": 6540
|
45790 |
+
},
|
45791 |
+
{
|
45792 |
+
"epoch": 0.2644778473015456,
|
45793 |
+
"grad_norm": 4.226531028747559,
|
45794 |
+
"learning_rate": 8.405524706070255e-05,
|
45795 |
+
"loss": 0.1342,
|
45796 |
+
"step": 6541
|
45797 |
+
},
|
45798 |
+
{
|
45799 |
+
"epoch": 0.2645182811568126,
|
45800 |
+
"grad_norm": 4.862766265869141,
|
45801 |
+
"learning_rate": 8.405057740455468e-05,
|
45802 |
+
"loss": 0.1249,
|
45803 |
+
"step": 6542
|
45804 |
+
},
|
45805 |
+
{
|
45806 |
+
"epoch": 0.2645587150120796,
|
45807 |
+
"grad_norm": 5.847499847412109,
|
45808 |
+
"learning_rate": 8.40459071944701e-05,
|
45809 |
+
"loss": 0.1532,
|
45810 |
+
"step": 6543
|
45811 |
+
},
|
45812 |
+
{
|
45813 |
+
"epoch": 0.26459914886734665,
|
45814 |
+
"grad_norm": 4.505615711212158,
|
45815 |
+
"learning_rate": 8.40412364305248e-05,
|
45816 |
+
"loss": 0.0901,
|
45817 |
+
"step": 6544
|
45818 |
+
},
|
45819 |
+
{
|
45820 |
+
"epoch": 0.2646395827226136,
|
45821 |
+
"grad_norm": 3.2695295810699463,
|
45822 |
+
"learning_rate": 8.403656511279477e-05,
|
45823 |
+
"loss": 0.1241,
|
45824 |
+
"step": 6545
|
45825 |
+
},
|
45826 |
+
{
|
45827 |
+
"epoch": 0.26468001657788065,
|
45828 |
+
"grad_norm": 2.6395678520202637,
|
45829 |
+
"learning_rate": 8.403189324135599e-05,
|
45830 |
+
"loss": 0.2737,
|
45831 |
+
"step": 6546
|
45832 |
+
},
|
45833 |
+
{
|
45834 |
+
"epoch": 0.2647204504331477,
|
45835 |
+
"grad_norm": 5.6780219078063965,
|
45836 |
+
"learning_rate": 8.402722081628447e-05,
|
45837 |
+
"loss": 0.1713,
|
45838 |
+
"step": 6547
|
45839 |
+
},
|
45840 |
+
{
|
45841 |
+
"epoch": 0.2647608842884147,
|
45842 |
+
"grad_norm": 7.018209934234619,
|
45843 |
+
"learning_rate": 8.402254783765622e-05,
|
45844 |
+
"loss": 0.2443,
|
45845 |
+
"step": 6548
|
45846 |
+
},
|
45847 |
+
{
|
45848 |
+
"epoch": 0.2648013181436817,
|
45849 |
+
"grad_norm": 5.417405128479004,
|
45850 |
+
"learning_rate": 8.401787430554726e-05,
|
45851 |
+
"loss": 0.1917,
|
45852 |
+
"step": 6549
|
45853 |
+
},
|
45854 |
+
{
|
45855 |
+
"epoch": 0.2648417519989487,
|
45856 |
+
"grad_norm": 6.50978946685791,
|
45857 |
+
"learning_rate": 8.401320022003362e-05,
|
45858 |
+
"loss": 0.2162,
|
45859 |
+
"step": 6550
|
45860 |
+
},
|
45861 |
+
{
|
45862 |
+
"epoch": 0.26488218585421575,
|
45863 |
+
"grad_norm": 6.596667766571045,
|
45864 |
+
"learning_rate": 8.400852558119133e-05,
|
45865 |
+
"loss": 0.2816,
|
45866 |
+
"step": 6551
|
45867 |
+
},
|
45868 |
+
{
|
45869 |
+
"epoch": 0.2649226197094828,
|
45870 |
+
"grad_norm": 4.556078910827637,
|
45871 |
+
"learning_rate": 8.400385038909646e-05,
|
45872 |
+
"loss": 0.1147,
|
45873 |
+
"step": 6552
|
45874 |
+
},
|
45875 |
+
{
|
45876 |
+
"epoch": 0.26496305356474975,
|
45877 |
+
"grad_norm": 5.520486831665039,
|
45878 |
+
"learning_rate": 8.399917464382503e-05,
|
45879 |
+
"loss": 0.1135,
|
45880 |
+
"step": 6553
|
45881 |
+
},
|
45882 |
+
{
|
45883 |
+
"epoch": 0.2650034874200168,
|
45884 |
+
"grad_norm": 3.928405284881592,
|
45885 |
+
"learning_rate": 8.399449834545313e-05,
|
45886 |
+
"loss": 0.2043,
|
45887 |
+
"step": 6554
|
45888 |
+
},
|
45889 |
+
{
|
45890 |
+
"epoch": 0.2650439212752838,
|
45891 |
+
"grad_norm": 3.1504178047180176,
|
45892 |
+
"learning_rate": 8.398982149405685e-05,
|
45893 |
+
"loss": 0.105,
|
45894 |
+
"step": 6555
|
45895 |
+
},
|
45896 |
+
{
|
45897 |
+
"epoch": 0.2650843551305508,
|
45898 |
+
"grad_norm": 2.864487409591675,
|
45899 |
+
"learning_rate": 8.398514408971222e-05,
|
45900 |
+
"loss": 0.1212,
|
45901 |
+
"step": 6556
|
45902 |
+
},
|
45903 |
+
{
|
45904 |
+
"epoch": 0.2651247889858178,
|
45905 |
+
"grad_norm": 5.958441734313965,
|
45906 |
+
"learning_rate": 8.39804661324954e-05,
|
45907 |
+
"loss": 0.1383,
|
45908 |
+
"step": 6557
|
45909 |
+
},
|
45910 |
+
{
|
45911 |
+
"epoch": 0.26516522284108485,
|
45912 |
+
"grad_norm": 4.321859836578369,
|
45913 |
+
"learning_rate": 8.397578762248243e-05,
|
45914 |
+
"loss": 0.1522,
|
45915 |
+
"step": 6558
|
45916 |
+
},
|
45917 |
+
{
|
45918 |
+
"epoch": 0.2652056566963519,
|
45919 |
+
"grad_norm": 1.879861831665039,
|
45920 |
+
"learning_rate": 8.397110855974947e-05,
|
45921 |
+
"loss": 0.1188,
|
45922 |
+
"step": 6559
|
45923 |
+
},
|
45924 |
+
{
|
45925 |
+
"epoch": 0.26524609055161885,
|
45926 |
+
"grad_norm": 3.717170476913452,
|
45927 |
+
"learning_rate": 8.39664289443726e-05,
|
45928 |
+
"loss": 0.2723,
|
45929 |
+
"step": 6560
|
45930 |
+
},
|
45931 |
+
{
|
45932 |
+
"epoch": 0.2652865244068859,
|
45933 |
+
"grad_norm": 4.7497639656066895,
|
45934 |
+
"learning_rate": 8.396174877642798e-05,
|
45935 |
+
"loss": 0.2185,
|
45936 |
+
"step": 6561
|
45937 |
+
},
|
45938 |
+
{
|
45939 |
+
"epoch": 0.2653269582621529,
|
45940 |
+
"grad_norm": 10.374393463134766,
|
45941 |
+
"learning_rate": 8.395706805599173e-05,
|
45942 |
+
"loss": 0.2596,
|
45943 |
+
"step": 6562
|
45944 |
+
},
|
45945 |
+
{
|
45946 |
+
"epoch": 0.26536739211741994,
|
45947 |
+
"grad_norm": 4.920714855194092,
|
45948 |
+
"learning_rate": 8.395238678313999e-05,
|
45949 |
+
"loss": 0.1864,
|
45950 |
+
"step": 6563
|
45951 |
+
},
|
45952 |
+
{
|
45953 |
+
"epoch": 0.2654078259726869,
|
45954 |
+
"grad_norm": 2.298659086227417,
|
45955 |
+
"learning_rate": 8.394770495794891e-05,
|
45956 |
+
"loss": 0.2215,
|
45957 |
+
"step": 6564
|
45958 |
+
},
|
45959 |
+
{
|
45960 |
+
"epoch": 0.26544825982795395,
|
45961 |
+
"grad_norm": 2.148195505142212,
|
45962 |
+
"learning_rate": 8.394302258049469e-05,
|
45963 |
+
"loss": 0.0792,
|
45964 |
+
"step": 6565
|
45965 |
+
},
|
45966 |
+
{
|
45967 |
+
"epoch": 0.265488693683221,
|
45968 |
+
"grad_norm": 2.573965311050415,
|
45969 |
+
"learning_rate": 8.393833965085347e-05,
|
45970 |
+
"loss": 0.2546,
|
45971 |
+
"step": 6566
|
45972 |
+
},
|
45973 |
+
{
|
45974 |
+
"epoch": 0.26552912753848795,
|
45975 |
+
"grad_norm": 2.622670888900757,
|
45976 |
+
"learning_rate": 8.393365616910145e-05,
|
45977 |
+
"loss": 0.1245,
|
45978 |
+
"step": 6567
|
45979 |
+
},
|
45980 |
+
{
|
45981 |
+
"epoch": 0.265569561393755,
|
45982 |
+
"grad_norm": 2.1678662300109863,
|
45983 |
+
"learning_rate": 8.39289721353148e-05,
|
45984 |
+
"loss": 0.1488,
|
45985 |
+
"step": 6568
|
45986 |
+
},
|
45987 |
+
{
|
45988 |
+
"epoch": 0.265609995249022,
|
45989 |
+
"grad_norm": 2.495394468307495,
|
45990 |
+
"learning_rate": 8.392428754956976e-05,
|
45991 |
+
"loss": 0.1709,
|
45992 |
+
"step": 6569
|
45993 |
+
},
|
45994 |
+
{
|
45995 |
+
"epoch": 0.26565042910428904,
|
45996 |
+
"grad_norm": 3.6703124046325684,
|
45997 |
+
"learning_rate": 8.39196024119425e-05,
|
45998 |
+
"loss": 0.1122,
|
45999 |
+
"step": 6570
|
46000 |
+
},
|
46001 |
+
{
|
46002 |
+
"epoch": 0.265690862959556,
|
46003 |
+
"grad_norm": 4.449604034423828,
|
46004 |
+
"learning_rate": 8.391491672250925e-05,
|
46005 |
+
"loss": 0.2264,
|
46006 |
+
"step": 6571
|
46007 |
+
},
|
46008 |
+
{
|
46009 |
+
"epoch": 0.26573129681482305,
|
46010 |
+
"grad_norm": 7.208311557769775,
|
46011 |
+
"learning_rate": 8.391023048134624e-05,
|
46012 |
+
"loss": 0.2274,
|
46013 |
+
"step": 6572
|
46014 |
+
},
|
46015 |
+
{
|
46016 |
+
"epoch": 0.2657717306700901,
|
46017 |
+
"grad_norm": 3.3646368980407715,
|
46018 |
+
"learning_rate": 8.39055436885297e-05,
|
46019 |
+
"loss": 0.1179,
|
46020 |
+
"step": 6573
|
46021 |
+
},
|
46022 |
+
{
|
46023 |
+
"epoch": 0.2658121645253571,
|
46024 |
+
"grad_norm": 3.414924383163452,
|
46025 |
+
"learning_rate": 8.390085634413588e-05,
|
46026 |
+
"loss": 0.1123,
|
46027 |
+
"step": 6574
|
46028 |
+
},
|
46029 |
+
{
|
46030 |
+
"epoch": 0.2658525983806241,
|
46031 |
+
"grad_norm": 4.110054016113281,
|
46032 |
+
"learning_rate": 8.389616844824103e-05,
|
46033 |
+
"loss": 0.1368,
|
46034 |
+
"step": 6575
|
46035 |
+
},
|
46036 |
+
{
|
46037 |
+
"epoch": 0.2658930322358911,
|
46038 |
+
"grad_norm": 3.7811076641082764,
|
46039 |
+
"learning_rate": 8.389148000092143e-05,
|
46040 |
+
"loss": 0.11,
|
46041 |
+
"step": 6576
|
46042 |
+
},
|
46043 |
+
{
|
46044 |
+
"epoch": 0.26593346609115814,
|
46045 |
+
"grad_norm": 3.9020886421203613,
|
46046 |
+
"learning_rate": 8.388679100225334e-05,
|
46047 |
+
"loss": 0.1784,
|
46048 |
+
"step": 6577
|
46049 |
+
},
|
46050 |
+
{
|
46051 |
+
"epoch": 0.2659738999464251,
|
46052 |
+
"grad_norm": 4.886922836303711,
|
46053 |
+
"learning_rate": 8.388210145231302e-05,
|
46054 |
+
"loss": 0.1216,
|
46055 |
+
"step": 6578
|
46056 |
+
},
|
46057 |
+
{
|
46058 |
+
"epoch": 0.26601433380169215,
|
46059 |
+
"grad_norm": 6.976899147033691,
|
46060 |
+
"learning_rate": 8.387741135117679e-05,
|
46061 |
+
"loss": 0.1608,
|
46062 |
+
"step": 6579
|
46063 |
+
},
|
46064 |
+
{
|
46065 |
+
"epoch": 0.2660547676569592,
|
46066 |
+
"grad_norm": 2.9632537364959717,
|
46067 |
+
"learning_rate": 8.387272069892094e-05,
|
46068 |
+
"loss": 0.1304,
|
46069 |
+
"step": 6580
|
46070 |
+
},
|
46071 |
+
{
|
46072 |
+
"epoch": 0.2660952015122262,
|
46073 |
+
"grad_norm": 5.112422943115234,
|
46074 |
+
"learning_rate": 8.386802949562177e-05,
|
46075 |
+
"loss": 0.2,
|
46076 |
+
"step": 6581
|
46077 |
+
},
|
46078 |
+
{
|
46079 |
+
"epoch": 0.2661356353674932,
|
46080 |
+
"grad_norm": 2.054858684539795,
|
46081 |
+
"learning_rate": 8.386333774135561e-05,
|
46082 |
+
"loss": 0.0589,
|
46083 |
+
"step": 6582
|
46084 |
+
},
|
46085 |
+
{
|
46086 |
+
"epoch": 0.2661760692227602,
|
46087 |
+
"grad_norm": 5.725131511688232,
|
46088 |
+
"learning_rate": 8.385864543619877e-05,
|
46089 |
+
"loss": 0.1647,
|
46090 |
+
"step": 6583
|
46091 |
+
},
|
46092 |
+
{
|
46093 |
+
"epoch": 0.26621650307802724,
|
46094 |
+
"grad_norm": 4.986160755157471,
|
46095 |
+
"learning_rate": 8.385395258022759e-05,
|
46096 |
+
"loss": 0.3018,
|
46097 |
+
"step": 6584
|
46098 |
+
},
|
46099 |
+
{
|
46100 |
+
"epoch": 0.2662569369332943,
|
46101 |
+
"grad_norm": 3.6309750080108643,
|
46102 |
+
"learning_rate": 8.384925917351842e-05,
|
46103 |
+
"loss": 0.13,
|
46104 |
+
"step": 6585
|
46105 |
+
},
|
46106 |
+
{
|
46107 |
+
"epoch": 0.26629737078856125,
|
46108 |
+
"grad_norm": 2.1312034130096436,
|
46109 |
+
"learning_rate": 8.384456521614761e-05,
|
46110 |
+
"loss": 0.1472,
|
46111 |
+
"step": 6586
|
46112 |
+
},
|
46113 |
+
{
|
46114 |
+
"epoch": 0.2663378046438283,
|
46115 |
+
"grad_norm": 8.192390441894531,
|
46116 |
+
"learning_rate": 8.383987070819154e-05,
|
46117 |
+
"loss": 0.1566,
|
46118 |
+
"step": 6587
|
46119 |
+
},
|
46120 |
+
{
|
46121 |
+
"epoch": 0.2663782384990953,
|
46122 |
+
"grad_norm": 3.709893226623535,
|
46123 |
+
"learning_rate": 8.383517564972653e-05,
|
46124 |
+
"loss": 0.2068,
|
46125 |
+
"step": 6588
|
46126 |
+
},
|
46127 |
+
{
|
46128 |
+
"epoch": 0.2664186723543623,
|
46129 |
+
"grad_norm": 6.357100486755371,
|
46130 |
+
"learning_rate": 8.383048004082902e-05,
|
46131 |
+
"loss": 0.1536,
|
46132 |
+
"step": 6589
|
46133 |
+
},
|
46134 |
+
{
|
46135 |
+
"epoch": 0.2664591062096293,
|
46136 |
+
"grad_norm": 9.251782417297363,
|
46137 |
+
"learning_rate": 8.382578388157536e-05,
|
46138 |
+
"loss": 0.259,
|
46139 |
+
"step": 6590
|
46140 |
+
},
|
46141 |
+
{
|
46142 |
+
"epoch": 0.26649954006489635,
|
46143 |
+
"grad_norm": 6.364524841308594,
|
46144 |
+
"learning_rate": 8.382108717204195e-05,
|
46145 |
+
"loss": 0.1661,
|
46146 |
+
"step": 6591
|
46147 |
+
},
|
46148 |
+
{
|
46149 |
+
"epoch": 0.2665399739201634,
|
46150 |
+
"grad_norm": 4.140508651733398,
|
46151 |
+
"learning_rate": 8.38163899123052e-05,
|
46152 |
+
"loss": 0.3332,
|
46153 |
+
"step": 6592
|
46154 |
+
},
|
46155 |
+
{
|
46156 |
+
"epoch": 0.26658040777543035,
|
46157 |
+
"grad_norm": 5.130396366119385,
|
46158 |
+
"learning_rate": 8.381169210244153e-05,
|
46159 |
+
"loss": 0.1297,
|
46160 |
+
"step": 6593
|
46161 |
+
},
|
46162 |
+
{
|
46163 |
+
"epoch": 0.2666208416306974,
|
46164 |
+
"grad_norm": 4.069079399108887,
|
46165 |
+
"learning_rate": 8.380699374252737e-05,
|
46166 |
+
"loss": 0.2063,
|
46167 |
+
"step": 6594
|
46168 |
+
},
|
46169 |
+
{
|
46170 |
+
"epoch": 0.2666612754859644,
|
46171 |
+
"grad_norm": 3.3463737964630127,
|
46172 |
+
"learning_rate": 8.380229483263913e-05,
|
46173 |
+
"loss": 0.1234,
|
46174 |
+
"step": 6595
|
46175 |
+
},
|
46176 |
+
{
|
46177 |
+
"epoch": 0.2667017093412314,
|
46178 |
+
"grad_norm": 3.121365785598755,
|
46179 |
+
"learning_rate": 8.379759537285328e-05,
|
46180 |
+
"loss": 0.3259,
|
46181 |
+
"step": 6596
|
46182 |
+
},
|
46183 |
+
{
|
46184 |
+
"epoch": 0.2667421431964984,
|
46185 |
+
"grad_norm": 3.143514394760132,
|
46186 |
+
"learning_rate": 8.379289536324626e-05,
|
46187 |
+
"loss": 0.1241,
|
46188 |
+
"step": 6597
|
46189 |
+
},
|
46190 |
+
{
|
46191 |
+
"epoch": 0.26678257705176545,
|
46192 |
+
"grad_norm": 1.6280158758163452,
|
46193 |
+
"learning_rate": 8.378819480389454e-05,
|
46194 |
+
"loss": 0.0781,
|
46195 |
+
"step": 6598
|
46196 |
+
},
|
46197 |
+
{
|
46198 |
+
"epoch": 0.2668230109070325,
|
46199 |
+
"grad_norm": 3.641113758087158,
|
46200 |
+
"learning_rate": 8.378349369487455e-05,
|
46201 |
+
"loss": 0.0979,
|
46202 |
+
"step": 6599
|
46203 |
+
},
|
46204 |
+
{
|
46205 |
+
"epoch": 0.26686344476229945,
|
46206 |
+
"grad_norm": 1.5974067449569702,
|
46207 |
+
"learning_rate": 8.377879203626283e-05,
|
46208 |
+
"loss": 0.0629,
|
46209 |
+
"step": 6600
|
46210 |
+
},
|
46211 |
+
{
|
46212 |
+
"epoch": 0.2669038786175665,
|
46213 |
+
"grad_norm": 2.053572654724121,
|
46214 |
+
"learning_rate": 8.377408982813579e-05,
|
46215 |
+
"loss": 0.0955,
|
46216 |
+
"step": 6601
|
46217 |
+
},
|
46218 |
+
{
|
46219 |
+
"epoch": 0.2669443124728335,
|
46220 |
+
"grad_norm": 3.7002804279327393,
|
46221 |
+
"learning_rate": 8.376938707056999e-05,
|
46222 |
+
"loss": 0.2416,
|
46223 |
+
"step": 6602
|
46224 |
+
},
|
46225 |
+
{
|
46226 |
+
"epoch": 0.26698474632810054,
|
46227 |
+
"grad_norm": 8.693068504333496,
|
46228 |
+
"learning_rate": 8.376468376364193e-05,
|
46229 |
+
"loss": 0.164,
|
46230 |
+
"step": 6603
|
46231 |
+
},
|
46232 |
+
{
|
46233 |
+
"epoch": 0.2670251801833675,
|
46234 |
+
"grad_norm": 3.3940460681915283,
|
46235 |
+
"learning_rate": 8.375997990742808e-05,
|
46236 |
+
"loss": 0.1395,
|
46237 |
+
"step": 6604
|
46238 |
+
},
|
46239 |
+
{
|
46240 |
+
"epoch": 0.26706561403863455,
|
46241 |
+
"grad_norm": 4.1176581382751465,
|
46242 |
+
"learning_rate": 8.3755275502005e-05,
|
46243 |
+
"loss": 0.1798,
|
46244 |
+
"step": 6605
|
46245 |
+
},
|
46246 |
+
{
|
46247 |
+
"epoch": 0.2671060478939016,
|
46248 |
+
"grad_norm": 3.1726737022399902,
|
46249 |
+
"learning_rate": 8.375057054744921e-05,
|
46250 |
+
"loss": 0.0952,
|
46251 |
+
"step": 6606
|
46252 |
+
},
|
46253 |
+
{
|
46254 |
+
"epoch": 0.26714648174916855,
|
46255 |
+
"grad_norm": 4.6025190353393555,
|
46256 |
+
"learning_rate": 8.374586504383724e-05,
|
46257 |
+
"loss": 0.1456,
|
46258 |
+
"step": 6607
|
46259 |
+
},
|
46260 |
+
{
|
46261 |
+
"epoch": 0.2671869156044356,
|
46262 |
+
"grad_norm": 4.858215808868408,
|
46263 |
+
"learning_rate": 8.374115899124568e-05,
|
46264 |
+
"loss": 0.1347,
|
46265 |
+
"step": 6608
|
46266 |
+
},
|
46267 |
+
{
|
46268 |
+
"epoch": 0.2672273494597026,
|
46269 |
+
"grad_norm": 6.650022029876709,
|
46270 |
+
"learning_rate": 8.373645238975104e-05,
|
46271 |
+
"loss": 0.1621,
|
46272 |
+
"step": 6609
|
46273 |
+
},
|
46274 |
+
{
|
46275 |
+
"epoch": 0.26726778331496964,
|
46276 |
+
"grad_norm": 4.232663154602051,
|
46277 |
+
"learning_rate": 8.37317452394299e-05,
|
46278 |
+
"loss": 0.1919,
|
46279 |
+
"step": 6610
|
46280 |
+
},
|
46281 |
+
{
|
46282 |
+
"epoch": 0.2673082171702366,
|
46283 |
+
"grad_norm": 6.808403968811035,
|
46284 |
+
"learning_rate": 8.372703754035885e-05,
|
46285 |
+
"loss": 0.1747,
|
46286 |
+
"step": 6611
|
46287 |
+
},
|
46288 |
+
{
|
46289 |
+
"epoch": 0.26734865102550365,
|
46290 |
+
"grad_norm": 9.129340171813965,
|
46291 |
+
"learning_rate": 8.372232929261448e-05,
|
46292 |
+
"loss": 0.1383,
|
46293 |
+
"step": 6612
|
46294 |
+
},
|
46295 |
+
{
|
46296 |
+
"epoch": 0.2673890848807707,
|
46297 |
+
"grad_norm": 1.4686884880065918,
|
46298 |
+
"learning_rate": 8.371762049627334e-05,
|
46299 |
+
"loss": 0.0541,
|
46300 |
+
"step": 6613
|
46301 |
+
},
|
46302 |
+
{
|
46303 |
+
"epoch": 0.2674295187360377,
|
46304 |
+
"grad_norm": 5.3172078132629395,
|
46305 |
+
"learning_rate": 8.371291115141209e-05,
|
46306 |
+
"loss": 0.1711,
|
46307 |
+
"step": 6614
|
46308 |
+
},
|
46309 |
+
{
|
46310 |
+
"epoch": 0.2674699525913047,
|
46311 |
+
"grad_norm": 1.6350047588348389,
|
46312 |
+
"learning_rate": 8.370820125810733e-05,
|
46313 |
+
"loss": 0.0823,
|
46314 |
+
"step": 6615
|
46315 |
+
},
|
46316 |
+
{
|
46317 |
+
"epoch": 0.2675103864465717,
|
46318 |
+
"grad_norm": 4.700094699859619,
|
46319 |
+
"learning_rate": 8.370349081643563e-05,
|
46320 |
+
"loss": 0.1098,
|
46321 |
+
"step": 6616
|
46322 |
+
},
|
46323 |
+
{
|
46324 |
+
"epoch": 0.26755082030183874,
|
46325 |
+
"grad_norm": 6.265729904174805,
|
46326 |
+
"learning_rate": 8.369877982647367e-05,
|
46327 |
+
"loss": 0.2921,
|
46328 |
+
"step": 6617
|
46329 |
+
},
|
46330 |
+
{
|
46331 |
+
"epoch": 0.2675912541571057,
|
46332 |
+
"grad_norm": 4.136383056640625,
|
46333 |
+
"learning_rate": 8.369406828829809e-05,
|
46334 |
+
"loss": 0.206,
|
46335 |
+
"step": 6618
|
46336 |
+
},
|
46337 |
+
{
|
46338 |
+
"epoch": 0.26763168801237275,
|
46339 |
+
"grad_norm": 4.891862392425537,
|
46340 |
+
"learning_rate": 8.36893562019855e-05,
|
46341 |
+
"loss": 0.2128,
|
46342 |
+
"step": 6619
|
46343 |
+
},
|
46344 |
+
{
|
46345 |
+
"epoch": 0.2676721218676398,
|
46346 |
+
"grad_norm": 6.812446117401123,
|
46347 |
+
"learning_rate": 8.36846435676126e-05,
|
46348 |
+
"loss": 0.2589,
|
46349 |
+
"step": 6620
|
46350 |
+
},
|
46351 |
+
{
|
46352 |
+
"epoch": 0.2677125557229068,
|
46353 |
+
"grad_norm": 3.122558832168579,
|
46354 |
+
"learning_rate": 8.367993038525603e-05,
|
46355 |
+
"loss": 0.1824,
|
46356 |
+
"step": 6621
|
46357 |
+
},
|
46358 |
+
{
|
46359 |
+
"epoch": 0.2677529895781738,
|
46360 |
+
"grad_norm": 4.192493438720703,
|
46361 |
+
"learning_rate": 8.367521665499248e-05,
|
46362 |
+
"loss": 0.1088,
|
46363 |
+
"step": 6622
|
46364 |
+
},
|
46365 |
+
{
|
46366 |
+
"epoch": 0.2677934234334408,
|
46367 |
+
"grad_norm": 5.159574031829834,
|
46368 |
+
"learning_rate": 8.36705023768986e-05,
|
46369 |
+
"loss": 0.2182,
|
46370 |
+
"step": 6623
|
46371 |
+
},
|
46372 |
+
{
|
46373 |
+
"epoch": 0.26783385728870784,
|
46374 |
+
"grad_norm": 5.709356784820557,
|
46375 |
+
"learning_rate": 8.366578755105111e-05,
|
46376 |
+
"loss": 0.1795,
|
46377 |
+
"step": 6624
|
46378 |
+
},
|
46379 |
+
{
|
46380 |
+
"epoch": 0.2678742911439749,
|
46381 |
+
"grad_norm": 5.803701877593994,
|
46382 |
+
"learning_rate": 8.366107217752672e-05,
|
46383 |
+
"loss": 0.2336,
|
46384 |
+
"step": 6625
|
46385 |
+
},
|
46386 |
+
{
|
46387 |
+
"epoch": 0.26791472499924185,
|
46388 |
+
"grad_norm": 2.842479705810547,
|
46389 |
+
"learning_rate": 8.365635625640212e-05,
|
46390 |
+
"loss": 0.1535,
|
46391 |
+
"step": 6626
|
46392 |
+
},
|
46393 |
+
{
|
46394 |
+
"epoch": 0.2679551588545089,
|
46395 |
+
"grad_norm": 8.77539348602295,
|
46396 |
+
"learning_rate": 8.365163978775403e-05,
|
46397 |
+
"loss": 0.3746,
|
46398 |
+
"step": 6627
|
46399 |
+
},
|
46400 |
+
{
|
46401 |
+
"epoch": 0.2679955927097759,
|
46402 |
+
"grad_norm": 6.282550811767578,
|
46403 |
+
"learning_rate": 8.364692277165918e-05,
|
46404 |
+
"loss": 0.1306,
|
46405 |
+
"step": 6628
|
46406 |
+
},
|
46407 |
+
{
|
46408 |
+
"epoch": 0.2680360265650429,
|
46409 |
+
"grad_norm": 6.026726722717285,
|
46410 |
+
"learning_rate": 8.36422052081943e-05,
|
46411 |
+
"loss": 0.1601,
|
46412 |
+
"step": 6629
|
46413 |
+
},
|
46414 |
+
{
|
46415 |
+
"epoch": 0.2680764604203099,
|
46416 |
+
"grad_norm": 2.6439599990844727,
|
46417 |
+
"learning_rate": 8.363748709743617e-05,
|
46418 |
+
"loss": 0.2097,
|
46419 |
+
"step": 6630
|
46420 |
+
},
|
46421 |
+
{
|
46422 |
+
"epoch": 0.26811689427557694,
|
46423 |
+
"grad_norm": 3.718888759613037,
|
46424 |
+
"learning_rate": 8.36327684394615e-05,
|
46425 |
+
"loss": 0.1553,
|
46426 |
+
"step": 6631
|
46427 |
+
},
|
46428 |
+
{
|
46429 |
+
"epoch": 0.268157328130844,
|
46430 |
+
"grad_norm": 5.948970317840576,
|
46431 |
+
"learning_rate": 8.362804923434709e-05,
|
46432 |
+
"loss": 0.1228,
|
46433 |
+
"step": 6632
|
46434 |
+
},
|
46435 |
+
{
|
46436 |
+
"epoch": 0.26819776198611095,
|
46437 |
+
"grad_norm": 2.3430135250091553,
|
46438 |
+
"learning_rate": 8.362332948216968e-05,
|
46439 |
+
"loss": 0.2042,
|
46440 |
+
"step": 6633
|
46441 |
+
},
|
46442 |
+
{
|
46443 |
+
"epoch": 0.268238195841378,
|
46444 |
+
"grad_norm": 2.8977715969085693,
|
46445 |
+
"learning_rate": 8.361860918300605e-05,
|
46446 |
+
"loss": 0.0938,
|
46447 |
+
"step": 6634
|
46448 |
+
},
|
46449 |
+
{
|
46450 |
+
"epoch": 0.268278629696645,
|
46451 |
+
"grad_norm": 6.015623569488525,
|
46452 |
+
"learning_rate": 8.361388833693304e-05,
|
46453 |
+
"loss": 0.2318,
|
46454 |
+
"step": 6635
|
46455 |
+
},
|
46456 |
+
{
|
46457 |
+
"epoch": 0.26831906355191204,
|
46458 |
+
"grad_norm": 3.8209362030029297,
|
46459 |
+
"learning_rate": 8.36091669440274e-05,
|
46460 |
+
"loss": 0.1284,
|
46461 |
+
"step": 6636
|
46462 |
+
},
|
46463 |
+
{
|
46464 |
+
"epoch": 0.268359497407179,
|
46465 |
+
"grad_norm": 5.317723274230957,
|
46466 |
+
"learning_rate": 8.360444500436597e-05,
|
46467 |
+
"loss": 0.167,
|
46468 |
+
"step": 6637
|
46469 |
+
},
|
46470 |
+
{
|
46471 |
+
"epoch": 0.26839993126244605,
|
46472 |
+
"grad_norm": 4.560892105102539,
|
46473 |
+
"learning_rate": 8.359972251802552e-05,
|
46474 |
+
"loss": 0.1182,
|
46475 |
+
"step": 6638
|
46476 |
+
},
|
46477 |
+
{
|
46478 |
+
"epoch": 0.2684403651177131,
|
46479 |
+
"grad_norm": 1.9216376543045044,
|
46480 |
+
"learning_rate": 8.359499948508291e-05,
|
46481 |
+
"loss": 0.1043,
|
46482 |
+
"step": 6639
|
46483 |
+
},
|
46484 |
+
{
|
46485 |
+
"epoch": 0.26848079897298005,
|
46486 |
+
"grad_norm": 3.5545313358306885,
|
46487 |
+
"learning_rate": 8.3590275905615e-05,
|
46488 |
+
"loss": 0.1981,
|
46489 |
+
"step": 6640
|
46490 |
+
},
|
46491 |
+
{
|
46492 |
+
"epoch": 0.2685212328282471,
|
46493 |
+
"grad_norm": 4.940046787261963,
|
46494 |
+
"learning_rate": 8.358555177969857e-05,
|
46495 |
+
"loss": 0.1473,
|
46496 |
+
"step": 6641
|
46497 |
+
},
|
46498 |
+
{
|
46499 |
+
"epoch": 0.2685616666835141,
|
46500 |
+
"grad_norm": 3.4543397426605225,
|
46501 |
+
"learning_rate": 8.358082710741053e-05,
|
46502 |
+
"loss": 0.1641,
|
46503 |
+
"step": 6642
|
46504 |
+
},
|
46505 |
+
{
|
46506 |
+
"epoch": 0.26860210053878114,
|
46507 |
+
"grad_norm": 5.4971418380737305,
|
46508 |
+
"learning_rate": 8.35761018888277e-05,
|
46509 |
+
"loss": 0.2024,
|
46510 |
+
"step": 6643
|
46511 |
+
},
|
46512 |
+
{
|
46513 |
+
"epoch": 0.2686425343940481,
|
46514 |
+
"grad_norm": 17.138687133789062,
|
46515 |
+
"learning_rate": 8.357137612402697e-05,
|
46516 |
+
"loss": 0.5021,
|
46517 |
+
"step": 6644
|
46518 |
+
},
|
46519 |
+
{
|
46520 |
+
"epoch": 0.26868296824931515,
|
46521 |
+
"grad_norm": 3.5056703090667725,
|
46522 |
+
"learning_rate": 8.356664981308522e-05,
|
46523 |
+
"loss": 0.1467,
|
46524 |
+
"step": 6645
|
46525 |
+
},
|
46526 |
+
{
|
46527 |
+
"epoch": 0.2687234021045822,
|
46528 |
+
"grad_norm": 6.707460880279541,
|
46529 |
+
"learning_rate": 8.356192295607934e-05,
|
46530 |
+
"loss": 0.2155,
|
46531 |
+
"step": 6646
|
46532 |
+
},
|
46533 |
+
{
|
46534 |
+
"epoch": 0.2687638359598492,
|
46535 |
+
"grad_norm": 5.155137062072754,
|
46536 |
+
"learning_rate": 8.355719555308622e-05,
|
46537 |
+
"loss": 0.2303,
|
46538 |
+
"step": 6647
|
46539 |
+
},
|
46540 |
+
{
|
46541 |
+
"epoch": 0.2688042698151162,
|
46542 |
+
"grad_norm": 2.9357731342315674,
|
46543 |
+
"learning_rate": 8.355246760418277e-05,
|
46544 |
+
"loss": 0.0866,
|
46545 |
+
"step": 6648
|
46546 |
+
},
|
46547 |
+
{
|
46548 |
+
"epoch": 0.2688447036703832,
|
46549 |
+
"grad_norm": 7.29972505569458,
|
46550 |
+
"learning_rate": 8.354773910944588e-05,
|
46551 |
+
"loss": 0.2833,
|
46552 |
+
"step": 6649
|
46553 |
+
},
|
46554 |
+
{
|
46555 |
+
"epoch": 0.26888513752565024,
|
46556 |
+
"grad_norm": 4.35799503326416,
|
46557 |
+
"learning_rate": 8.35430100689525e-05,
|
46558 |
+
"loss": 0.1768,
|
46559 |
+
"step": 6650
|
46560 |
+
},
|
46561 |
+
{
|
46562 |
+
"epoch": 0.2689255713809172,
|
46563 |
+
"grad_norm": 7.227541446685791,
|
46564 |
+
"learning_rate": 8.353828048277957e-05,
|
46565 |
+
"loss": 0.2469,
|
46566 |
+
"step": 6651
|
46567 |
+
},
|
46568 |
+
{
|
46569 |
+
"epoch": 0.26896600523618425,
|
46570 |
+
"grad_norm": 5.29514741897583,
|
46571 |
+
"learning_rate": 8.353355035100402e-05,
|
46572 |
+
"loss": 0.3047,
|
46573 |
+
"step": 6652
|
46574 |
+
},
|
46575 |
+
{
|
46576 |
+
"epoch": 0.2690064390914513,
|
46577 |
+
"grad_norm": 5.829686164855957,
|
46578 |
+
"learning_rate": 8.352881967370279e-05,
|
46579 |
+
"loss": 0.1513,
|
46580 |
+
"step": 6653
|
46581 |
+
},
|
46582 |
+
{
|
46583 |
+
"epoch": 0.2690468729467183,
|
46584 |
+
"grad_norm": 4.818840026855469,
|
46585 |
+
"learning_rate": 8.352408845095283e-05,
|
46586 |
+
"loss": 0.2807,
|
46587 |
+
"step": 6654
|
46588 |
+
},
|
46589 |
+
{
|
46590 |
+
"epoch": 0.2690873068019853,
|
46591 |
+
"grad_norm": 5.2545013427734375,
|
46592 |
+
"learning_rate": 8.351935668283115e-05,
|
46593 |
+
"loss": 0.2154,
|
46594 |
+
"step": 6655
|
46595 |
+
},
|
46596 |
+
{
|
46597 |
+
"epoch": 0.2691277406572523,
|
46598 |
+
"grad_norm": 2.9671027660369873,
|
46599 |
+
"learning_rate": 8.351462436941469e-05,
|
46600 |
+
"loss": 0.158,
|
46601 |
+
"step": 6656
|
46602 |
+
},
|
46603 |
+
{
|
46604 |
+
"epoch": 0.26916817451251934,
|
46605 |
+
"grad_norm": 5.487869739532471,
|
46606 |
+
"learning_rate": 8.350989151078046e-05,
|
46607 |
+
"loss": 0.0881,
|
46608 |
+
"step": 6657
|
46609 |
+
},
|
46610 |
+
{
|
46611 |
+
"epoch": 0.2692086083677864,
|
46612 |
+
"grad_norm": 4.169815540313721,
|
46613 |
+
"learning_rate": 8.350515810700541e-05,
|
46614 |
+
"loss": 0.1348,
|
46615 |
+
"step": 6658
|
46616 |
+
},
|
46617 |
+
{
|
46618 |
+
"epoch": 0.26924904222305335,
|
46619 |
+
"grad_norm": 2.261898994445801,
|
46620 |
+
"learning_rate": 8.350042415816661e-05,
|
46621 |
+
"loss": 0.1066,
|
46622 |
+
"step": 6659
|
46623 |
+
},
|
46624 |
+
{
|
46625 |
+
"epoch": 0.2692894760783204,
|
46626 |
+
"grad_norm": 2.4516499042510986,
|
46627 |
+
"learning_rate": 8.349568966434102e-05,
|
46628 |
+
"loss": 0.1044,
|
46629 |
+
"step": 6660
|
46630 |
+
},
|
46631 |
+
{
|
46632 |
+
"epoch": 0.2693299099335874,
|
46633 |
+
"grad_norm": 2.239933490753174,
|
46634 |
+
"learning_rate": 8.349095462560567e-05,
|
46635 |
+
"loss": 0.1095,
|
46636 |
+
"step": 6661
|
46637 |
+
},
|
46638 |
+
{
|
46639 |
+
"epoch": 0.2693703437888544,
|
46640 |
+
"grad_norm": 4.995827674865723,
|
46641 |
+
"learning_rate": 8.348621904203762e-05,
|
46642 |
+
"loss": 0.2587,
|
46643 |
+
"step": 6662
|
46644 |
+
},
|
46645 |
+
{
|
46646 |
+
"epoch": 0.2694107776441214,
|
46647 |
+
"grad_norm": 3.9526302814483643,
|
46648 |
+
"learning_rate": 8.348148291371387e-05,
|
46649 |
+
"loss": 0.1517,
|
46650 |
+
"step": 6663
|
46651 |
+
},
|
46652 |
+
{
|
46653 |
+
"epoch": 0.26945121149938844,
|
46654 |
+
"grad_norm": 4.4188761711120605,
|
46655 |
+
"learning_rate": 8.347674624071149e-05,
|
46656 |
+
"loss": 0.2291,
|
46657 |
+
"step": 6664
|
46658 |
+
},
|
46659 |
+
{
|
46660 |
+
"epoch": 0.2694916453546555,
|
46661 |
+
"grad_norm": 4.185996055603027,
|
46662 |
+
"learning_rate": 8.347200902310753e-05,
|
46663 |
+
"loss": 0.24,
|
46664 |
+
"step": 6665
|
46665 |
+
},
|
46666 |
+
{
|
46667 |
+
"epoch": 0.26953207920992245,
|
46668 |
+
"grad_norm": 4.599777698516846,
|
46669 |
+
"learning_rate": 8.346727126097904e-05,
|
46670 |
+
"loss": 0.1806,
|
46671 |
+
"step": 6666
|
46672 |
+
},
|
46673 |
+
{
|
46674 |
+
"epoch": 0.2695725130651895,
|
46675 |
+
"grad_norm": 6.624025821685791,
|
46676 |
+
"learning_rate": 8.346253295440312e-05,
|
46677 |
+
"loss": 0.2144,
|
46678 |
+
"step": 6667
|
46679 |
+
},
|
46680 |
+
{
|
46681 |
+
"epoch": 0.2696129469204565,
|
46682 |
+
"grad_norm": 3.7423858642578125,
|
46683 |
+
"learning_rate": 8.345779410345687e-05,
|
46684 |
+
"loss": 0.2625,
|
46685 |
+
"step": 6668
|
46686 |
+
},
|
46687 |
+
{
|
46688 |
+
"epoch": 0.26965338077572354,
|
46689 |
+
"grad_norm": 2.7171175479888916,
|
46690 |
+
"learning_rate": 8.345305470821732e-05,
|
46691 |
+
"loss": 0.1175,
|
46692 |
+
"step": 6669
|
46693 |
+
},
|
46694 |
+
{
|
46695 |
+
"epoch": 0.2696938146309905,
|
46696 |
+
"grad_norm": 5.327414035797119,
|
46697 |
+
"learning_rate": 8.344831476876161e-05,
|
46698 |
+
"loss": 0.1192,
|
46699 |
+
"step": 6670
|
46700 |
+
},
|
46701 |
+
{
|
46702 |
+
"epoch": 0.26973424848625754,
|
46703 |
+
"grad_norm": 3.4716527462005615,
|
46704 |
+
"learning_rate": 8.344357428516685e-05,
|
46705 |
+
"loss": 0.1002,
|
46706 |
+
"step": 6671
|
46707 |
+
},
|
46708 |
+
{
|
46709 |
+
"epoch": 0.2697746823415246,
|
46710 |
+
"grad_norm": 3.382917642593384,
|
46711 |
+
"learning_rate": 8.343883325751016e-05,
|
46712 |
+
"loss": 0.2696,
|
46713 |
+
"step": 6672
|
46714 |
+
},
|
46715 |
+
{
|
46716 |
+
"epoch": 0.26981511619679155,
|
46717 |
+
"grad_norm": 5.441958427429199,
|
46718 |
+
"learning_rate": 8.343409168586867e-05,
|
46719 |
+
"loss": 0.1752,
|
46720 |
+
"step": 6673
|
46721 |
+
},
|
46722 |
+
{
|
46723 |
+
"epoch": 0.2698555500520586,
|
46724 |
+
"grad_norm": 3.3768417835235596,
|
46725 |
+
"learning_rate": 8.34293495703195e-05,
|
46726 |
+
"loss": 0.092,
|
46727 |
+
"step": 6674
|
46728 |
+
},
|
46729 |
+
{
|
46730 |
+
"epoch": 0.2698959839073256,
|
46731 |
+
"grad_norm": 5.813772201538086,
|
46732 |
+
"learning_rate": 8.342460691093978e-05,
|
46733 |
+
"loss": 0.2326,
|
46734 |
+
"step": 6675
|
46735 |
+
},
|
46736 |
+
{
|
46737 |
+
"epoch": 0.26993641776259264,
|
46738 |
+
"grad_norm": 3.0050082206726074,
|
46739 |
+
"learning_rate": 8.34198637078067e-05,
|
46740 |
+
"loss": 0.089,
|
46741 |
+
"step": 6676
|
46742 |
+
},
|
46743 |
+
{
|
46744 |
+
"epoch": 0.2699768516178596,
|
46745 |
+
"grad_norm": 4.313435077667236,
|
46746 |
+
"learning_rate": 8.341511996099741e-05,
|
46747 |
+
"loss": 0.2665,
|
46748 |
+
"step": 6677
|
46749 |
+
},
|
46750 |
+
{
|
46751 |
+
"epoch": 0.27001728547312664,
|
46752 |
+
"grad_norm": 2.853736162185669,
|
46753 |
+
"learning_rate": 8.34103756705891e-05,
|
46754 |
+
"loss": 0.066,
|
46755 |
+
"step": 6678
|
46756 |
+
},
|
46757 |
+
{
|
46758 |
+
"epoch": 0.2700577193283937,
|
46759 |
+
"grad_norm": 5.7998552322387695,
|
46760 |
+
"learning_rate": 8.34056308366589e-05,
|
46761 |
+
"loss": 0.1508,
|
46762 |
+
"step": 6679
|
46763 |
+
},
|
46764 |
+
{
|
46765 |
+
"epoch": 0.2700981531836607,
|
46766 |
+
"grad_norm": 2.8775806427001953,
|
46767 |
+
"learning_rate": 8.340088545928404e-05,
|
46768 |
+
"loss": 0.1989,
|
46769 |
+
"step": 6680
|
46770 |
+
},
|
46771 |
+
{
|
46772 |
+
"epoch": 0.2701385870389277,
|
46773 |
+
"grad_norm": 2.0381646156311035,
|
46774 |
+
"learning_rate": 8.33961395385417e-05,
|
46775 |
+
"loss": 0.0942,
|
46776 |
+
"step": 6681
|
46777 |
+
},
|
46778 |
+
{
|
46779 |
+
"epoch": 0.2701790208941947,
|
46780 |
+
"grad_norm": 3.8013174533843994,
|
46781 |
+
"learning_rate": 8.339139307450911e-05,
|
46782 |
+
"loss": 0.155,
|
46783 |
+
"step": 6682
|
46784 |
+
},
|
46785 |
+
{
|
46786 |
+
"epoch": 0.27021945474946174,
|
46787 |
+
"grad_norm": 2.5843374729156494,
|
46788 |
+
"learning_rate": 8.338664606726347e-05,
|
46789 |
+
"loss": 0.1153,
|
46790 |
+
"step": 6683
|
46791 |
+
},
|
46792 |
+
{
|
46793 |
+
"epoch": 0.2702598886047287,
|
46794 |
+
"grad_norm": 4.373371124267578,
|
46795 |
+
"learning_rate": 8.338189851688201e-05,
|
46796 |
+
"loss": 0.2366,
|
46797 |
+
"step": 6684
|
46798 |
+
},
|
46799 |
+
{
|
46800 |
+
"epoch": 0.27030032245999575,
|
46801 |
+
"grad_norm": 2.953427791595459,
|
46802 |
+
"learning_rate": 8.337715042344194e-05,
|
46803 |
+
"loss": 0.2328,
|
46804 |
+
"step": 6685
|
46805 |
+
},
|
46806 |
+
{
|
46807 |
+
"epoch": 0.2703407563152628,
|
46808 |
+
"grad_norm": 4.133306980133057,
|
46809 |
+
"learning_rate": 8.337240178702052e-05,
|
46810 |
+
"loss": 0.1676,
|
46811 |
+
"step": 6686
|
46812 |
+
},
|
46813 |
+
{
|
46814 |
+
"epoch": 0.2703811901705298,
|
46815 |
+
"grad_norm": 5.411648273468018,
|
46816 |
+
"learning_rate": 8.3367652607695e-05,
|
46817 |
+
"loss": 0.1771,
|
46818 |
+
"step": 6687
|
46819 |
+
},
|
46820 |
+
{
|
46821 |
+
"epoch": 0.2704216240257968,
|
46822 |
+
"grad_norm": 4.436055660247803,
|
46823 |
+
"learning_rate": 8.336290288554268e-05,
|
46824 |
+
"loss": 0.1585,
|
46825 |
+
"step": 6688
|
46826 |
+
},
|
46827 |
+
{
|
46828 |
+
"epoch": 0.2704620578810638,
|
46829 |
+
"grad_norm": 4.031379222869873,
|
46830 |
+
"learning_rate": 8.335815262064075e-05,
|
46831 |
+
"loss": 0.2252,
|
46832 |
+
"step": 6689
|
46833 |
+
},
|
46834 |
+
{
|
46835 |
+
"epoch": 0.27050249173633084,
|
46836 |
+
"grad_norm": 6.782537460327148,
|
46837 |
+
"learning_rate": 8.335340181306654e-05,
|
46838 |
+
"loss": 0.1625,
|
46839 |
+
"step": 6690
|
46840 |
+
},
|
46841 |
+
{
|
46842 |
+
"epoch": 0.27054292559159787,
|
46843 |
+
"grad_norm": 3.9009714126586914,
|
46844 |
+
"learning_rate": 8.334865046289733e-05,
|
46845 |
+
"loss": 0.1869,
|
46846 |
+
"step": 6691
|
46847 |
+
},
|
46848 |
+
{
|
46849 |
+
"epoch": 0.27058335944686485,
|
46850 |
+
"grad_norm": 3.912127733230591,
|
46851 |
+
"learning_rate": 8.33438985702104e-05,
|
46852 |
+
"loss": 0.1963,
|
46853 |
+
"step": 6692
|
46854 |
}
|
46855 |
],
|
46856 |
"logging_steps": 1,
|
|
|
46870 |
"attributes": {}
|
46871 |
}
|
46872 |
},
|
46873 |
+
"total_flos": 4.1390589456521626e+17,
|
46874 |
"train_batch_size": 4,
|
46875 |
"trial_name": null,
|
46876 |
"trial_params": null
|