Training in progress, step 6931, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1140880624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c68bc13e9238d4fdb9db2f0c782f6223b5e85e20f488a0c8fa9cb2f418466c87
|
3 |
size 1140880624
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2281891834
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcbbd53364d43bfb485637e11047d5b4b89930d16792d5c29034620dc707bd3b
|
3 |
size 2281891834
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1722d21317f79792031569f541248401751adc36a70fe5d2046e270a48c32a44
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b4cc2fae949e9befd95d52d477dbb2e6f74837264d63805409ab08170c2f0f5
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -46851,6 +46851,1679 @@
|
|
46851 |
"learning_rate": 8.33438985702104e-05,
|
46852 |
"loss": 0.1963,
|
46853 |
"step": 6692
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46854 |
}
|
46855 |
],
|
46856 |
"logging_steps": 1,
|
@@ -46870,7 +48543,7 @@
|
|
46870 |
"attributes": {}
|
46871 |
}
|
46872 |
},
|
46873 |
-
"total_flos": 4.
|
46874 |
"train_batch_size": 4,
|
46875 |
"trial_name": null,
|
46876 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.28024705085568147,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 6931,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
46851 |
"learning_rate": 8.33438985702104e-05,
|
46852 |
"loss": 0.1963,
|
46853 |
"step": 6692
|
46854 |
+
},
|
46855 |
+
{
|
46856 |
+
"epoch": 0.2706237933021319,
|
46857 |
+
"grad_norm": 4.984101295471191,
|
46858 |
+
"learning_rate": 8.333914613508306e-05,
|
46859 |
+
"loss": 0.2424,
|
46860 |
+
"step": 6693
|
46861 |
+
},
|
46862 |
+
{
|
46863 |
+
"epoch": 0.2706642271573989,
|
46864 |
+
"grad_norm": 4.030520439147949,
|
46865 |
+
"learning_rate": 8.333439315759264e-05,
|
46866 |
+
"loss": 0.073,
|
46867 |
+
"step": 6694
|
46868 |
+
},
|
46869 |
+
{
|
46870 |
+
"epoch": 0.2707046610126659,
|
46871 |
+
"grad_norm": 3.0774435997009277,
|
46872 |
+
"learning_rate": 8.332963963781646e-05,
|
46873 |
+
"loss": 0.2013,
|
46874 |
+
"step": 6695
|
46875 |
+
},
|
46876 |
+
{
|
46877 |
+
"epoch": 0.2707450948679329,
|
46878 |
+
"grad_norm": 3.8038461208343506,
|
46879 |
+
"learning_rate": 8.332488557583181e-05,
|
46880 |
+
"loss": 0.107,
|
46881 |
+
"step": 6696
|
46882 |
+
},
|
46883 |
+
{
|
46884 |
+
"epoch": 0.27078552872319994,
|
46885 |
+
"grad_norm": 2.3463916778564453,
|
46886 |
+
"learning_rate": 8.332013097171607e-05,
|
46887 |
+
"loss": 0.066,
|
46888 |
+
"step": 6697
|
46889 |
+
},
|
46890 |
+
{
|
46891 |
+
"epoch": 0.27082596257846697,
|
46892 |
+
"grad_norm": 6.2922844886779785,
|
46893 |
+
"learning_rate": 8.331537582554658e-05,
|
46894 |
+
"loss": 0.1706,
|
46895 |
+
"step": 6698
|
46896 |
+
},
|
46897 |
+
{
|
46898 |
+
"epoch": 0.27086639643373395,
|
46899 |
+
"grad_norm": 2.470485210418701,
|
46900 |
+
"learning_rate": 8.33106201374007e-05,
|
46901 |
+
"loss": 0.0889,
|
46902 |
+
"step": 6699
|
46903 |
+
},
|
46904 |
+
{
|
46905 |
+
"epoch": 0.270906830289001,
|
46906 |
+
"grad_norm": 5.717158794403076,
|
46907 |
+
"learning_rate": 8.330586390735577e-05,
|
46908 |
+
"loss": 0.231,
|
46909 |
+
"step": 6700
|
46910 |
+
},
|
46911 |
+
{
|
46912 |
+
"epoch": 0.270947264144268,
|
46913 |
+
"grad_norm": 3.0989787578582764,
|
46914 |
+
"learning_rate": 8.33011071354892e-05,
|
46915 |
+
"loss": 0.1874,
|
46916 |
+
"step": 6701
|
46917 |
+
},
|
46918 |
+
{
|
46919 |
+
"epoch": 0.27098769799953504,
|
46920 |
+
"grad_norm": 2.269683361053467,
|
46921 |
+
"learning_rate": 8.329634982187834e-05,
|
46922 |
+
"loss": 0.1511,
|
46923 |
+
"step": 6702
|
46924 |
+
},
|
46925 |
+
{
|
46926 |
+
"epoch": 0.271028131854802,
|
46927 |
+
"grad_norm": 2.1404988765716553,
|
46928 |
+
"learning_rate": 8.329159196660062e-05,
|
46929 |
+
"loss": 0.1141,
|
46930 |
+
"step": 6703
|
46931 |
+
},
|
46932 |
+
{
|
46933 |
+
"epoch": 0.27106856571006904,
|
46934 |
+
"grad_norm": 2.9804563522338867,
|
46935 |
+
"learning_rate": 8.328683356973341e-05,
|
46936 |
+
"loss": 0.0836,
|
46937 |
+
"step": 6704
|
46938 |
+
},
|
46939 |
+
{
|
46940 |
+
"epoch": 0.2711089995653361,
|
46941 |
+
"grad_norm": 3.4215526580810547,
|
46942 |
+
"learning_rate": 8.328207463135414e-05,
|
46943 |
+
"loss": 0.2453,
|
46944 |
+
"step": 6705
|
46945 |
+
},
|
46946 |
+
{
|
46947 |
+
"epoch": 0.27114943342060305,
|
46948 |
+
"grad_norm": 2.8280129432678223,
|
46949 |
+
"learning_rate": 8.327731515154021e-05,
|
46950 |
+
"loss": 0.2035,
|
46951 |
+
"step": 6706
|
46952 |
+
},
|
46953 |
+
{
|
46954 |
+
"epoch": 0.2711898672758701,
|
46955 |
+
"grad_norm": 2.2147393226623535,
|
46956 |
+
"learning_rate": 8.327255513036904e-05,
|
46957 |
+
"loss": 0.1283,
|
46958 |
+
"step": 6707
|
46959 |
+
},
|
46960 |
+
{
|
46961 |
+
"epoch": 0.2712303011311371,
|
46962 |
+
"grad_norm": 5.129738807678223,
|
46963 |
+
"learning_rate": 8.326779456791811e-05,
|
46964 |
+
"loss": 0.1318,
|
46965 |
+
"step": 6708
|
46966 |
+
},
|
46967 |
+
{
|
46968 |
+
"epoch": 0.27127073498640414,
|
46969 |
+
"grad_norm": 5.385104179382324,
|
46970 |
+
"learning_rate": 8.326303346426483e-05,
|
46971 |
+
"loss": 0.2047,
|
46972 |
+
"step": 6709
|
46973 |
+
},
|
46974 |
+
{
|
46975 |
+
"epoch": 0.2713111688416711,
|
46976 |
+
"grad_norm": 2.130554437637329,
|
46977 |
+
"learning_rate": 8.325827181948666e-05,
|
46978 |
+
"loss": 0.0432,
|
46979 |
+
"step": 6710
|
46980 |
+
},
|
46981 |
+
{
|
46982 |
+
"epoch": 0.27135160269693814,
|
46983 |
+
"grad_norm": 3.767094373703003,
|
46984 |
+
"learning_rate": 8.325350963366105e-05,
|
46985 |
+
"loss": 0.1456,
|
46986 |
+
"step": 6711
|
46987 |
+
},
|
46988 |
+
{
|
46989 |
+
"epoch": 0.2713920365522052,
|
46990 |
+
"grad_norm": 4.412478923797607,
|
46991 |
+
"learning_rate": 8.32487469068655e-05,
|
46992 |
+
"loss": 0.118,
|
46993 |
+
"step": 6712
|
46994 |
+
},
|
46995 |
+
{
|
46996 |
+
"epoch": 0.2714324704074722,
|
46997 |
+
"grad_norm": 5.66835355758667,
|
46998 |
+
"learning_rate": 8.324398363917749e-05,
|
46999 |
+
"loss": 0.2295,
|
47000 |
+
"step": 6713
|
47001 |
+
},
|
47002 |
+
{
|
47003 |
+
"epoch": 0.2714729042627392,
|
47004 |
+
"grad_norm": 12.864553451538086,
|
47005 |
+
"learning_rate": 8.323921983067449e-05,
|
47006 |
+
"loss": 0.2788,
|
47007 |
+
"step": 6714
|
47008 |
+
},
|
47009 |
+
{
|
47010 |
+
"epoch": 0.2715133381180062,
|
47011 |
+
"grad_norm": 8.19010066986084,
|
47012 |
+
"learning_rate": 8.323445548143398e-05,
|
47013 |
+
"loss": 0.2069,
|
47014 |
+
"step": 6715
|
47015 |
+
},
|
47016 |
+
{
|
47017 |
+
"epoch": 0.27155377197327324,
|
47018 |
+
"grad_norm": 6.033942699432373,
|
47019 |
+
"learning_rate": 8.32296905915335e-05,
|
47020 |
+
"loss": 0.2121,
|
47021 |
+
"step": 6716
|
47022 |
+
},
|
47023 |
+
{
|
47024 |
+
"epoch": 0.2715942058285402,
|
47025 |
+
"grad_norm": 5.334500789642334,
|
47026 |
+
"learning_rate": 8.322492516105057e-05,
|
47027 |
+
"loss": 0.2267,
|
47028 |
+
"step": 6717
|
47029 |
+
},
|
47030 |
+
{
|
47031 |
+
"epoch": 0.27163463968380724,
|
47032 |
+
"grad_norm": 3.398758888244629,
|
47033 |
+
"learning_rate": 8.32201591900627e-05,
|
47034 |
+
"loss": 0.1119,
|
47035 |
+
"step": 6718
|
47036 |
+
},
|
47037 |
+
{
|
47038 |
+
"epoch": 0.2716750735390743,
|
47039 |
+
"grad_norm": 4.973703384399414,
|
47040 |
+
"learning_rate": 8.321539267864739e-05,
|
47041 |
+
"loss": 0.1895,
|
47042 |
+
"step": 6719
|
47043 |
+
},
|
47044 |
+
{
|
47045 |
+
"epoch": 0.2717155073943413,
|
47046 |
+
"grad_norm": 3.5402252674102783,
|
47047 |
+
"learning_rate": 8.321062562688225e-05,
|
47048 |
+
"loss": 0.1418,
|
47049 |
+
"step": 6720
|
47050 |
+
},
|
47051 |
+
{
|
47052 |
+
"epoch": 0.2717559412496083,
|
47053 |
+
"grad_norm": 5.844006538391113,
|
47054 |
+
"learning_rate": 8.320585803484478e-05,
|
47055 |
+
"loss": 0.3136,
|
47056 |
+
"step": 6721
|
47057 |
+
},
|
47058 |
+
{
|
47059 |
+
"epoch": 0.2717963751048753,
|
47060 |
+
"grad_norm": 5.243696689605713,
|
47061 |
+
"learning_rate": 8.320108990261257e-05,
|
47062 |
+
"loss": 0.1199,
|
47063 |
+
"step": 6722
|
47064 |
+
},
|
47065 |
+
{
|
47066 |
+
"epoch": 0.27183680896014234,
|
47067 |
+
"grad_norm": 2.885085105895996,
|
47068 |
+
"learning_rate": 8.319632123026315e-05,
|
47069 |
+
"loss": 0.0434,
|
47070 |
+
"step": 6723
|
47071 |
+
},
|
47072 |
+
{
|
47073 |
+
"epoch": 0.2718772428154093,
|
47074 |
+
"grad_norm": 5.479928970336914,
|
47075 |
+
"learning_rate": 8.319155201787414e-05,
|
47076 |
+
"loss": 0.3036,
|
47077 |
+
"step": 6724
|
47078 |
+
},
|
47079 |
+
{
|
47080 |
+
"epoch": 0.27191767667067634,
|
47081 |
+
"grad_norm": 4.742898941040039,
|
47082 |
+
"learning_rate": 8.31867822655231e-05,
|
47083 |
+
"loss": 0.2227,
|
47084 |
+
"step": 6725
|
47085 |
+
},
|
47086 |
+
{
|
47087 |
+
"epoch": 0.2719581105259434,
|
47088 |
+
"grad_norm": 4.850002288818359,
|
47089 |
+
"learning_rate": 8.318201197328763e-05,
|
47090 |
+
"loss": 0.2028,
|
47091 |
+
"step": 6726
|
47092 |
+
},
|
47093 |
+
{
|
47094 |
+
"epoch": 0.2719985443812104,
|
47095 |
+
"grad_norm": 1.1411263942718506,
|
47096 |
+
"learning_rate": 8.317724114124532e-05,
|
47097 |
+
"loss": 0.0395,
|
47098 |
+
"step": 6727
|
47099 |
+
},
|
47100 |
+
{
|
47101 |
+
"epoch": 0.2720389782364774,
|
47102 |
+
"grad_norm": 6.2171149253845215,
|
47103 |
+
"learning_rate": 8.317246976947382e-05,
|
47104 |
+
"loss": 0.229,
|
47105 |
+
"step": 6728
|
47106 |
+
},
|
47107 |
+
{
|
47108 |
+
"epoch": 0.2720794120917444,
|
47109 |
+
"grad_norm": 4.50931978225708,
|
47110 |
+
"learning_rate": 8.316769785805072e-05,
|
47111 |
+
"loss": 0.3059,
|
47112 |
+
"step": 6729
|
47113 |
+
},
|
47114 |
+
{
|
47115 |
+
"epoch": 0.27211984594701144,
|
47116 |
+
"grad_norm": 5.534306526184082,
|
47117 |
+
"learning_rate": 8.316292540705364e-05,
|
47118 |
+
"loss": 0.2355,
|
47119 |
+
"step": 6730
|
47120 |
+
},
|
47121 |
+
{
|
47122 |
+
"epoch": 0.27216027980227847,
|
47123 |
+
"grad_norm": 3.494084358215332,
|
47124 |
+
"learning_rate": 8.315815241656025e-05,
|
47125 |
+
"loss": 0.1486,
|
47126 |
+
"step": 6731
|
47127 |
+
},
|
47128 |
+
{
|
47129 |
+
"epoch": 0.27220071365754545,
|
47130 |
+
"grad_norm": 3.203897476196289,
|
47131 |
+
"learning_rate": 8.315337888664818e-05,
|
47132 |
+
"loss": 0.1919,
|
47133 |
+
"step": 6732
|
47134 |
+
},
|
47135 |
+
{
|
47136 |
+
"epoch": 0.2722411475128125,
|
47137 |
+
"grad_norm": 4.390408515930176,
|
47138 |
+
"learning_rate": 8.31486048173951e-05,
|
47139 |
+
"loss": 0.1323,
|
47140 |
+
"step": 6733
|
47141 |
+
},
|
47142 |
+
{
|
47143 |
+
"epoch": 0.2722815813680795,
|
47144 |
+
"grad_norm": 2.8268377780914307,
|
47145 |
+
"learning_rate": 8.314383020887865e-05,
|
47146 |
+
"loss": 0.2003,
|
47147 |
+
"step": 6734
|
47148 |
+
},
|
47149 |
+
{
|
47150 |
+
"epoch": 0.2723220152233465,
|
47151 |
+
"grad_norm": 7.170968532562256,
|
47152 |
+
"learning_rate": 8.313905506117652e-05,
|
47153 |
+
"loss": 0.2049,
|
47154 |
+
"step": 6735
|
47155 |
+
},
|
47156 |
+
{
|
47157 |
+
"epoch": 0.2723624490786135,
|
47158 |
+
"grad_norm": 6.943522930145264,
|
47159 |
+
"learning_rate": 8.313427937436638e-05,
|
47160 |
+
"loss": 0.1712,
|
47161 |
+
"step": 6736
|
47162 |
+
},
|
47163 |
+
{
|
47164 |
+
"epoch": 0.27240288293388054,
|
47165 |
+
"grad_norm": 3.642824411392212,
|
47166 |
+
"learning_rate": 8.312950314852595e-05,
|
47167 |
+
"loss": 0.115,
|
47168 |
+
"step": 6737
|
47169 |
+
},
|
47170 |
+
{
|
47171 |
+
"epoch": 0.27244331678914757,
|
47172 |
+
"grad_norm": 4.236616611480713,
|
47173 |
+
"learning_rate": 8.312472638373289e-05,
|
47174 |
+
"loss": 0.1493,
|
47175 |
+
"step": 6738
|
47176 |
+
},
|
47177 |
+
{
|
47178 |
+
"epoch": 0.27248375064441455,
|
47179 |
+
"grad_norm": 4.64017391204834,
|
47180 |
+
"learning_rate": 8.311994908006495e-05,
|
47181 |
+
"loss": 0.164,
|
47182 |
+
"step": 6739
|
47183 |
+
},
|
47184 |
+
{
|
47185 |
+
"epoch": 0.2725241844996816,
|
47186 |
+
"grad_norm": 3.158374547958374,
|
47187 |
+
"learning_rate": 8.31151712375998e-05,
|
47188 |
+
"loss": 0.078,
|
47189 |
+
"step": 6740
|
47190 |
+
},
|
47191 |
+
{
|
47192 |
+
"epoch": 0.2725646183549486,
|
47193 |
+
"grad_norm": 3.3717398643493652,
|
47194 |
+
"learning_rate": 8.311039285641521e-05,
|
47195 |
+
"loss": 0.2323,
|
47196 |
+
"step": 6741
|
47197 |
+
},
|
47198 |
+
{
|
47199 |
+
"epoch": 0.27260505221021564,
|
47200 |
+
"grad_norm": 2.4161839485168457,
|
47201 |
+
"learning_rate": 8.310561393658892e-05,
|
47202 |
+
"loss": 0.0811,
|
47203 |
+
"step": 6742
|
47204 |
+
},
|
47205 |
+
{
|
47206 |
+
"epoch": 0.2726454860654826,
|
47207 |
+
"grad_norm": 7.858556747436523,
|
47208 |
+
"learning_rate": 8.31008344781986e-05,
|
47209 |
+
"loss": 0.2503,
|
47210 |
+
"step": 6743
|
47211 |
+
},
|
47212 |
+
{
|
47213 |
+
"epoch": 0.27268591992074964,
|
47214 |
+
"grad_norm": 6.197620391845703,
|
47215 |
+
"learning_rate": 8.309605448132209e-05,
|
47216 |
+
"loss": 0.1578,
|
47217 |
+
"step": 6744
|
47218 |
+
},
|
47219 |
+
{
|
47220 |
+
"epoch": 0.2727263537760167,
|
47221 |
+
"grad_norm": 4.341484069824219,
|
47222 |
+
"learning_rate": 8.30912739460371e-05,
|
47223 |
+
"loss": 0.0827,
|
47224 |
+
"step": 6745
|
47225 |
+
},
|
47226 |
+
{
|
47227 |
+
"epoch": 0.27276678763128365,
|
47228 |
+
"grad_norm": 2.8397490978240967,
|
47229 |
+
"learning_rate": 8.308649287242143e-05,
|
47230 |
+
"loss": 0.0854,
|
47231 |
+
"step": 6746
|
47232 |
+
},
|
47233 |
+
{
|
47234 |
+
"epoch": 0.2728072214865507,
|
47235 |
+
"grad_norm": 7.487560272216797,
|
47236 |
+
"learning_rate": 8.308171126055284e-05,
|
47237 |
+
"loss": 0.2085,
|
47238 |
+
"step": 6747
|
47239 |
+
},
|
47240 |
+
{
|
47241 |
+
"epoch": 0.2728476553418177,
|
47242 |
+
"grad_norm": 4.53800106048584,
|
47243 |
+
"learning_rate": 8.307692911050911e-05,
|
47244 |
+
"loss": 0.0882,
|
47245 |
+
"step": 6748
|
47246 |
+
},
|
47247 |
+
{
|
47248 |
+
"epoch": 0.27288808919708474,
|
47249 |
+
"grad_norm": 3.140773296356201,
|
47250 |
+
"learning_rate": 8.307214642236807e-05,
|
47251 |
+
"loss": 0.2199,
|
47252 |
+
"step": 6749
|
47253 |
+
},
|
47254 |
+
{
|
47255 |
+
"epoch": 0.2729285230523517,
|
47256 |
+
"grad_norm": 2.5138485431671143,
|
47257 |
+
"learning_rate": 8.30673631962075e-05,
|
47258 |
+
"loss": 0.0854,
|
47259 |
+
"step": 6750
|
47260 |
+
},
|
47261 |
+
{
|
47262 |
+
"epoch": 0.27296895690761874,
|
47263 |
+
"grad_norm": 6.800701141357422,
|
47264 |
+
"learning_rate": 8.306257943210519e-05,
|
47265 |
+
"loss": 0.3975,
|
47266 |
+
"step": 6751
|
47267 |
+
},
|
47268 |
+
{
|
47269 |
+
"epoch": 0.2730093907628858,
|
47270 |
+
"grad_norm": 6.718089580535889,
|
47271 |
+
"learning_rate": 8.3057795130139e-05,
|
47272 |
+
"loss": 0.3117,
|
47273 |
+
"step": 6752
|
47274 |
+
},
|
47275 |
+
{
|
47276 |
+
"epoch": 0.2730498246181528,
|
47277 |
+
"grad_norm": 6.603305816650391,
|
47278 |
+
"learning_rate": 8.305301029038675e-05,
|
47279 |
+
"loss": 0.2097,
|
47280 |
+
"step": 6753
|
47281 |
+
},
|
47282 |
+
{
|
47283 |
+
"epoch": 0.2730902584734198,
|
47284 |
+
"grad_norm": 2.7208642959594727,
|
47285 |
+
"learning_rate": 8.30482249129263e-05,
|
47286 |
+
"loss": 0.0939,
|
47287 |
+
"step": 6754
|
47288 |
+
},
|
47289 |
+
{
|
47290 |
+
"epoch": 0.2731306923286868,
|
47291 |
+
"grad_norm": 4.9741363525390625,
|
47292 |
+
"learning_rate": 8.304343899783546e-05,
|
47293 |
+
"loss": 0.2044,
|
47294 |
+
"step": 6755
|
47295 |
+
},
|
47296 |
+
{
|
47297 |
+
"epoch": 0.27317112618395384,
|
47298 |
+
"grad_norm": 3.9004909992218018,
|
47299 |
+
"learning_rate": 8.303865254519212e-05,
|
47300 |
+
"loss": 0.0972,
|
47301 |
+
"step": 6756
|
47302 |
+
},
|
47303 |
+
{
|
47304 |
+
"epoch": 0.2732115600392208,
|
47305 |
+
"grad_norm": 2.577651023864746,
|
47306 |
+
"learning_rate": 8.303386555507412e-05,
|
47307 |
+
"loss": 0.0375,
|
47308 |
+
"step": 6757
|
47309 |
+
},
|
47310 |
+
{
|
47311 |
+
"epoch": 0.27325199389448784,
|
47312 |
+
"grad_norm": 6.103005409240723,
|
47313 |
+
"learning_rate": 8.302907802755934e-05,
|
47314 |
+
"loss": 0.2076,
|
47315 |
+
"step": 6758
|
47316 |
+
},
|
47317 |
+
{
|
47318 |
+
"epoch": 0.2732924277497549,
|
47319 |
+
"grad_norm": 2.476658582687378,
|
47320 |
+
"learning_rate": 8.302428996272569e-05,
|
47321 |
+
"loss": 0.1518,
|
47322 |
+
"step": 6759
|
47323 |
+
},
|
47324 |
+
{
|
47325 |
+
"epoch": 0.2733328616050219,
|
47326 |
+
"grad_norm": 7.241556644439697,
|
47327 |
+
"learning_rate": 8.301950136065105e-05,
|
47328 |
+
"loss": 0.2085,
|
47329 |
+
"step": 6760
|
47330 |
+
},
|
47331 |
+
{
|
47332 |
+
"epoch": 0.2733732954602889,
|
47333 |
+
"grad_norm": 5.153772354125977,
|
47334 |
+
"learning_rate": 8.30147122214133e-05,
|
47335 |
+
"loss": 0.1336,
|
47336 |
+
"step": 6761
|
47337 |
+
},
|
47338 |
+
{
|
47339 |
+
"epoch": 0.2734137293155559,
|
47340 |
+
"grad_norm": 7.413361072540283,
|
47341 |
+
"learning_rate": 8.300992254509037e-05,
|
47342 |
+
"loss": 0.2148,
|
47343 |
+
"step": 6762
|
47344 |
+
},
|
47345 |
+
{
|
47346 |
+
"epoch": 0.27345416317082294,
|
47347 |
+
"grad_norm": 7.208649635314941,
|
47348 |
+
"learning_rate": 8.300513233176017e-05,
|
47349 |
+
"loss": 0.2475,
|
47350 |
+
"step": 6763
|
47351 |
+
},
|
47352 |
+
{
|
47353 |
+
"epoch": 0.27349459702608997,
|
47354 |
+
"grad_norm": 1.9269968271255493,
|
47355 |
+
"learning_rate": 8.300034158150065e-05,
|
47356 |
+
"loss": 0.0837,
|
47357 |
+
"step": 6764
|
47358 |
+
},
|
47359 |
+
{
|
47360 |
+
"epoch": 0.27353503088135694,
|
47361 |
+
"grad_norm": 8.421066284179688,
|
47362 |
+
"learning_rate": 8.29955502943897e-05,
|
47363 |
+
"loss": 0.2008,
|
47364 |
+
"step": 6765
|
47365 |
+
},
|
47366 |
+
{
|
47367 |
+
"epoch": 0.273575464736624,
|
47368 |
+
"grad_norm": 4.142081260681152,
|
47369 |
+
"learning_rate": 8.299075847050533e-05,
|
47370 |
+
"loss": 0.1124,
|
47371 |
+
"step": 6766
|
47372 |
+
},
|
47373 |
+
{
|
47374 |
+
"epoch": 0.273615898591891,
|
47375 |
+
"grad_norm": 4.070856094360352,
|
47376 |
+
"learning_rate": 8.298596610992542e-05,
|
47377 |
+
"loss": 0.2756,
|
47378 |
+
"step": 6767
|
47379 |
+
},
|
47380 |
+
{
|
47381 |
+
"epoch": 0.273656332447158,
|
47382 |
+
"grad_norm": 3.6061317920684814,
|
47383 |
+
"learning_rate": 8.298117321272798e-05,
|
47384 |
+
"loss": 0.146,
|
47385 |
+
"step": 6768
|
47386 |
+
},
|
47387 |
+
{
|
47388 |
+
"epoch": 0.273696766302425,
|
47389 |
+
"grad_norm": 3.447809934616089,
|
47390 |
+
"learning_rate": 8.297637977899097e-05,
|
47391 |
+
"loss": 0.1407,
|
47392 |
+
"step": 6769
|
47393 |
+
},
|
47394 |
+
{
|
47395 |
+
"epoch": 0.27373720015769204,
|
47396 |
+
"grad_norm": 6.205649375915527,
|
47397 |
+
"learning_rate": 8.297158580879239e-05,
|
47398 |
+
"loss": 0.1161,
|
47399 |
+
"step": 6770
|
47400 |
+
},
|
47401 |
+
{
|
47402 |
+
"epoch": 0.27377763401295907,
|
47403 |
+
"grad_norm": 2.858762264251709,
|
47404 |
+
"learning_rate": 8.296679130221019e-05,
|
47405 |
+
"loss": 0.1518,
|
47406 |
+
"step": 6771
|
47407 |
+
},
|
47408 |
+
{
|
47409 |
+
"epoch": 0.27381806786822604,
|
47410 |
+
"grad_norm": 4.097366809844971,
|
47411 |
+
"learning_rate": 8.296199625932239e-05,
|
47412 |
+
"loss": 0.1155,
|
47413 |
+
"step": 6772
|
47414 |
+
},
|
47415 |
+
{
|
47416 |
+
"epoch": 0.2738585017234931,
|
47417 |
+
"grad_norm": 2.412763833999634,
|
47418 |
+
"learning_rate": 8.295720068020697e-05,
|
47419 |
+
"loss": 0.0685,
|
47420 |
+
"step": 6773
|
47421 |
+
},
|
47422 |
+
{
|
47423 |
+
"epoch": 0.2738989355787601,
|
47424 |
+
"grad_norm": 6.983695030212402,
|
47425 |
+
"learning_rate": 8.295240456494201e-05,
|
47426 |
+
"loss": 0.2204,
|
47427 |
+
"step": 6774
|
47428 |
+
},
|
47429 |
+
{
|
47430 |
+
"epoch": 0.27393936943402714,
|
47431 |
+
"grad_norm": 5.8649139404296875,
|
47432 |
+
"learning_rate": 8.294760791360546e-05,
|
47433 |
+
"loss": 0.1488,
|
47434 |
+
"step": 6775
|
47435 |
+
},
|
47436 |
+
{
|
47437 |
+
"epoch": 0.2739798032892941,
|
47438 |
+
"grad_norm": 8.427542686462402,
|
47439 |
+
"learning_rate": 8.29428107262754e-05,
|
47440 |
+
"loss": 0.2546,
|
47441 |
+
"step": 6776
|
47442 |
+
},
|
47443 |
+
{
|
47444 |
+
"epoch": 0.27402023714456114,
|
47445 |
+
"grad_norm": 2.494324207305908,
|
47446 |
+
"learning_rate": 8.293801300302983e-05,
|
47447 |
+
"loss": 0.0721,
|
47448 |
+
"step": 6777
|
47449 |
+
},
|
47450 |
+
{
|
47451 |
+
"epoch": 0.27406067099982817,
|
47452 |
+
"grad_norm": 2.2018325328826904,
|
47453 |
+
"learning_rate": 8.293321474394684e-05,
|
47454 |
+
"loss": 0.0916,
|
47455 |
+
"step": 6778
|
47456 |
+
},
|
47457 |
+
{
|
47458 |
+
"epoch": 0.27410110485509515,
|
47459 |
+
"grad_norm": 4.509099006652832,
|
47460 |
+
"learning_rate": 8.292841594910448e-05,
|
47461 |
+
"loss": 0.0924,
|
47462 |
+
"step": 6779
|
47463 |
+
},
|
47464 |
+
{
|
47465 |
+
"epoch": 0.2741415387103622,
|
47466 |
+
"grad_norm": 4.540764331817627,
|
47467 |
+
"learning_rate": 8.292361661858079e-05,
|
47468 |
+
"loss": 0.1449,
|
47469 |
+
"step": 6780
|
47470 |
+
},
|
47471 |
+
{
|
47472 |
+
"epoch": 0.2741819725656292,
|
47473 |
+
"grad_norm": 3.1155524253845215,
|
47474 |
+
"learning_rate": 8.291881675245386e-05,
|
47475 |
+
"loss": 0.2028,
|
47476 |
+
"step": 6781
|
47477 |
+
},
|
47478 |
+
{
|
47479 |
+
"epoch": 0.27422240642089624,
|
47480 |
+
"grad_norm": 4.623219966888428,
|
47481 |
+
"learning_rate": 8.29140163508018e-05,
|
47482 |
+
"loss": 0.1246,
|
47483 |
+
"step": 6782
|
47484 |
+
},
|
47485 |
+
{
|
47486 |
+
"epoch": 0.2742628402761632,
|
47487 |
+
"grad_norm": 5.5894880294799805,
|
47488 |
+
"learning_rate": 8.290921541370267e-05,
|
47489 |
+
"loss": 0.2832,
|
47490 |
+
"step": 6783
|
47491 |
+
},
|
47492 |
+
{
|
47493 |
+
"epoch": 0.27430327413143024,
|
47494 |
+
"grad_norm": 3.8701984882354736,
|
47495 |
+
"learning_rate": 8.290441394123458e-05,
|
47496 |
+
"loss": 0.058,
|
47497 |
+
"step": 6784
|
47498 |
+
},
|
47499 |
+
{
|
47500 |
+
"epoch": 0.27434370798669727,
|
47501 |
+
"grad_norm": 3.939068078994751,
|
47502 |
+
"learning_rate": 8.289961193347565e-05,
|
47503 |
+
"loss": 0.181,
|
47504 |
+
"step": 6785
|
47505 |
+
},
|
47506 |
+
{
|
47507 |
+
"epoch": 0.2743841418419643,
|
47508 |
+
"grad_norm": 4.081961631774902,
|
47509 |
+
"learning_rate": 8.289480939050398e-05,
|
47510 |
+
"loss": 0.2096,
|
47511 |
+
"step": 6786
|
47512 |
+
},
|
47513 |
+
{
|
47514 |
+
"epoch": 0.2744245756972313,
|
47515 |
+
"grad_norm": 4.949100017547607,
|
47516 |
+
"learning_rate": 8.289000631239774e-05,
|
47517 |
+
"loss": 0.2057,
|
47518 |
+
"step": 6787
|
47519 |
+
},
|
47520 |
+
{
|
47521 |
+
"epoch": 0.2744650095524983,
|
47522 |
+
"grad_norm": 2.971660614013672,
|
47523 |
+
"learning_rate": 8.2885202699235e-05,
|
47524 |
+
"loss": 0.1002,
|
47525 |
+
"step": 6788
|
47526 |
+
},
|
47527 |
+
{
|
47528 |
+
"epoch": 0.27450544340776534,
|
47529 |
+
"grad_norm": 3.818432569503784,
|
47530 |
+
"learning_rate": 8.288039855109398e-05,
|
47531 |
+
"loss": 0.1002,
|
47532 |
+
"step": 6789
|
47533 |
+
},
|
47534 |
+
{
|
47535 |
+
"epoch": 0.2745458772630323,
|
47536 |
+
"grad_norm": 5.171977996826172,
|
47537 |
+
"learning_rate": 8.287559386805277e-05,
|
47538 |
+
"loss": 0.1415,
|
47539 |
+
"step": 6790
|
47540 |
+
},
|
47541 |
+
{
|
47542 |
+
"epoch": 0.27458631111829934,
|
47543 |
+
"grad_norm": 5.658943176269531,
|
47544 |
+
"learning_rate": 8.287078865018959e-05,
|
47545 |
+
"loss": 0.1659,
|
47546 |
+
"step": 6791
|
47547 |
+
},
|
47548 |
+
{
|
47549 |
+
"epoch": 0.2746267449735664,
|
47550 |
+
"grad_norm": 4.878620624542236,
|
47551 |
+
"learning_rate": 8.286598289758257e-05,
|
47552 |
+
"loss": 0.2983,
|
47553 |
+
"step": 6792
|
47554 |
+
},
|
47555 |
+
{
|
47556 |
+
"epoch": 0.2746671788288334,
|
47557 |
+
"grad_norm": 3.6901004314422607,
|
47558 |
+
"learning_rate": 8.28611766103099e-05,
|
47559 |
+
"loss": 0.111,
|
47560 |
+
"step": 6793
|
47561 |
+
},
|
47562 |
+
{
|
47563 |
+
"epoch": 0.2747076126841004,
|
47564 |
+
"grad_norm": 5.00291633605957,
|
47565 |
+
"learning_rate": 8.285636978844976e-05,
|
47566 |
+
"loss": 0.1408,
|
47567 |
+
"step": 6794
|
47568 |
+
},
|
47569 |
+
{
|
47570 |
+
"epoch": 0.2747480465393674,
|
47571 |
+
"grad_norm": 4.313044548034668,
|
47572 |
+
"learning_rate": 8.285156243208037e-05,
|
47573 |
+
"loss": 0.1819,
|
47574 |
+
"step": 6795
|
47575 |
+
},
|
47576 |
+
{
|
47577 |
+
"epoch": 0.27478848039463444,
|
47578 |
+
"grad_norm": 3.5659806728363037,
|
47579 |
+
"learning_rate": 8.284675454127992e-05,
|
47580 |
+
"loss": 0.1301,
|
47581 |
+
"step": 6796
|
47582 |
+
},
|
47583 |
+
{
|
47584 |
+
"epoch": 0.27482891424990147,
|
47585 |
+
"grad_norm": 4.71846342086792,
|
47586 |
+
"learning_rate": 8.284194611612663e-05,
|
47587 |
+
"loss": 0.153,
|
47588 |
+
"step": 6797
|
47589 |
+
},
|
47590 |
+
{
|
47591 |
+
"epoch": 0.27486934810516844,
|
47592 |
+
"grad_norm": 5.4654011726379395,
|
47593 |
+
"learning_rate": 8.283713715669874e-05,
|
47594 |
+
"loss": 0.1709,
|
47595 |
+
"step": 6798
|
47596 |
+
},
|
47597 |
+
{
|
47598 |
+
"epoch": 0.2749097819604355,
|
47599 |
+
"grad_norm": 2.5251972675323486,
|
47600 |
+
"learning_rate": 8.283232766307446e-05,
|
47601 |
+
"loss": 0.2577,
|
47602 |
+
"step": 6799
|
47603 |
+
},
|
47604 |
+
{
|
47605 |
+
"epoch": 0.2749502158157025,
|
47606 |
+
"grad_norm": 4.130078315734863,
|
47607 |
+
"learning_rate": 8.282751763533203e-05,
|
47608 |
+
"loss": 0.1687,
|
47609 |
+
"step": 6800
|
47610 |
+
},
|
47611 |
+
{
|
47612 |
+
"epoch": 0.2749906496709695,
|
47613 |
+
"grad_norm": 7.523174285888672,
|
47614 |
+
"learning_rate": 8.282270707354971e-05,
|
47615 |
+
"loss": 0.1373,
|
47616 |
+
"step": 6801
|
47617 |
+
},
|
47618 |
+
{
|
47619 |
+
"epoch": 0.2750310835262365,
|
47620 |
+
"grad_norm": 3.8236072063446045,
|
47621 |
+
"learning_rate": 8.281789597780576e-05,
|
47622 |
+
"loss": 0.1893,
|
47623 |
+
"step": 6802
|
47624 |
+
},
|
47625 |
+
{
|
47626 |
+
"epoch": 0.27507151738150354,
|
47627 |
+
"grad_norm": 5.089610576629639,
|
47628 |
+
"learning_rate": 8.281308434817845e-05,
|
47629 |
+
"loss": 0.1476,
|
47630 |
+
"step": 6803
|
47631 |
+
},
|
47632 |
+
{
|
47633 |
+
"epoch": 0.27511195123677057,
|
47634 |
+
"grad_norm": 2.454280138015747,
|
47635 |
+
"learning_rate": 8.280827218474605e-05,
|
47636 |
+
"loss": 0.1098,
|
47637 |
+
"step": 6804
|
47638 |
+
},
|
47639 |
+
{
|
47640 |
+
"epoch": 0.27515238509203754,
|
47641 |
+
"grad_norm": 5.247398376464844,
|
47642 |
+
"learning_rate": 8.280345948758683e-05,
|
47643 |
+
"loss": 0.1305,
|
47644 |
+
"step": 6805
|
47645 |
+
},
|
47646 |
+
{
|
47647 |
+
"epoch": 0.2751928189473046,
|
47648 |
+
"grad_norm": 3.7775826454162598,
|
47649 |
+
"learning_rate": 8.27986462567791e-05,
|
47650 |
+
"loss": 0.2421,
|
47651 |
+
"step": 6806
|
47652 |
+
},
|
47653 |
+
{
|
47654 |
+
"epoch": 0.2752332528025716,
|
47655 |
+
"grad_norm": 2.4471261501312256,
|
47656 |
+
"learning_rate": 8.279383249240115e-05,
|
47657 |
+
"loss": 0.1474,
|
47658 |
+
"step": 6807
|
47659 |
+
},
|
47660 |
+
{
|
47661 |
+
"epoch": 0.27527368665783863,
|
47662 |
+
"grad_norm": 3.33980655670166,
|
47663 |
+
"learning_rate": 8.278901819453133e-05,
|
47664 |
+
"loss": 0.086,
|
47665 |
+
"step": 6808
|
47666 |
+
},
|
47667 |
+
{
|
47668 |
+
"epoch": 0.2753141205131056,
|
47669 |
+
"grad_norm": 7.791358947753906,
|
47670 |
+
"learning_rate": 8.27842033632479e-05,
|
47671 |
+
"loss": 0.1993,
|
47672 |
+
"step": 6809
|
47673 |
+
},
|
47674 |
+
{
|
47675 |
+
"epoch": 0.27535455436837264,
|
47676 |
+
"grad_norm": 2.4375100135803223,
|
47677 |
+
"learning_rate": 8.277938799862923e-05,
|
47678 |
+
"loss": 0.112,
|
47679 |
+
"step": 6810
|
47680 |
+
},
|
47681 |
+
{
|
47682 |
+
"epoch": 0.27539498822363967,
|
47683 |
+
"grad_norm": 2.704415798187256,
|
47684 |
+
"learning_rate": 8.277457210075363e-05,
|
47685 |
+
"loss": 0.1114,
|
47686 |
+
"step": 6811
|
47687 |
+
},
|
47688 |
+
{
|
47689 |
+
"epoch": 0.27543542207890664,
|
47690 |
+
"grad_norm": 4.825018882751465,
|
47691 |
+
"learning_rate": 8.276975566969947e-05,
|
47692 |
+
"loss": 0.3218,
|
47693 |
+
"step": 6812
|
47694 |
+
},
|
47695 |
+
{
|
47696 |
+
"epoch": 0.2754758559341737,
|
47697 |
+
"grad_norm": 5.685189723968506,
|
47698 |
+
"learning_rate": 8.27649387055451e-05,
|
47699 |
+
"loss": 0.3668,
|
47700 |
+
"step": 6813
|
47701 |
+
},
|
47702 |
+
{
|
47703 |
+
"epoch": 0.2755162897894407,
|
47704 |
+
"grad_norm": 8.155264854431152,
|
47705 |
+
"learning_rate": 8.276012120836886e-05,
|
47706 |
+
"loss": 0.1866,
|
47707 |
+
"step": 6814
|
47708 |
+
},
|
47709 |
+
{
|
47710 |
+
"epoch": 0.27555672364470774,
|
47711 |
+
"grad_norm": 3.2562761306762695,
|
47712 |
+
"learning_rate": 8.275530317824914e-05,
|
47713 |
+
"loss": 0.1482,
|
47714 |
+
"step": 6815
|
47715 |
+
},
|
47716 |
+
{
|
47717 |
+
"epoch": 0.2755971574999747,
|
47718 |
+
"grad_norm": 3.1442646980285645,
|
47719 |
+
"learning_rate": 8.275048461526432e-05,
|
47720 |
+
"loss": 0.1576,
|
47721 |
+
"step": 6816
|
47722 |
+
},
|
47723 |
+
{
|
47724 |
+
"epoch": 0.27563759135524174,
|
47725 |
+
"grad_norm": 5.888988494873047,
|
47726 |
+
"learning_rate": 8.274566551949278e-05,
|
47727 |
+
"loss": 0.2234,
|
47728 |
+
"step": 6817
|
47729 |
+
},
|
47730 |
+
{
|
47731 |
+
"epoch": 0.27567802521050877,
|
47732 |
+
"grad_norm": 3.1819725036621094,
|
47733 |
+
"learning_rate": 8.274084589101291e-05,
|
47734 |
+
"loss": 0.1992,
|
47735 |
+
"step": 6818
|
47736 |
+
},
|
47737 |
+
{
|
47738 |
+
"epoch": 0.2757184590657758,
|
47739 |
+
"grad_norm": 2.676445960998535,
|
47740 |
+
"learning_rate": 8.273602572990315e-05,
|
47741 |
+
"loss": 0.0627,
|
47742 |
+
"step": 6819
|
47743 |
+
},
|
47744 |
+
{
|
47745 |
+
"epoch": 0.2757588929210428,
|
47746 |
+
"grad_norm": 4.076587677001953,
|
47747 |
+
"learning_rate": 8.27312050362419e-05,
|
47748 |
+
"loss": 0.1462,
|
47749 |
+
"step": 6820
|
47750 |
+
},
|
47751 |
+
{
|
47752 |
+
"epoch": 0.2757993267763098,
|
47753 |
+
"grad_norm": 3.8222618103027344,
|
47754 |
+
"learning_rate": 8.272638381010756e-05,
|
47755 |
+
"loss": 0.0729,
|
47756 |
+
"step": 6821
|
47757 |
+
},
|
47758 |
+
{
|
47759 |
+
"epoch": 0.27583976063157684,
|
47760 |
+
"grad_norm": 3.7292747497558594,
|
47761 |
+
"learning_rate": 8.272156205157859e-05,
|
47762 |
+
"loss": 0.1262,
|
47763 |
+
"step": 6822
|
47764 |
+
},
|
47765 |
+
{
|
47766 |
+
"epoch": 0.2758801944868438,
|
47767 |
+
"grad_norm": 4.968018054962158,
|
47768 |
+
"learning_rate": 8.271673976073341e-05,
|
47769 |
+
"loss": 0.1207,
|
47770 |
+
"step": 6823
|
47771 |
+
},
|
47772 |
+
{
|
47773 |
+
"epoch": 0.27592062834211084,
|
47774 |
+
"grad_norm": 2.4657115936279297,
|
47775 |
+
"learning_rate": 8.271191693765048e-05,
|
47776 |
+
"loss": 0.1591,
|
47777 |
+
"step": 6824
|
47778 |
+
},
|
47779 |
+
{
|
47780 |
+
"epoch": 0.27596106219737787,
|
47781 |
+
"grad_norm": 3.529189109802246,
|
47782 |
+
"learning_rate": 8.270709358240827e-05,
|
47783 |
+
"loss": 0.2575,
|
47784 |
+
"step": 6825
|
47785 |
+
},
|
47786 |
+
{
|
47787 |
+
"epoch": 0.2760014960526449,
|
47788 |
+
"grad_norm": 2.8450260162353516,
|
47789 |
+
"learning_rate": 8.270226969508522e-05,
|
47790 |
+
"loss": 0.1599,
|
47791 |
+
"step": 6826
|
47792 |
+
},
|
47793 |
+
{
|
47794 |
+
"epoch": 0.2760419299079119,
|
47795 |
+
"grad_norm": 4.889963626861572,
|
47796 |
+
"learning_rate": 8.26974452757598e-05,
|
47797 |
+
"loss": 0.1218,
|
47798 |
+
"step": 6827
|
47799 |
+
},
|
47800 |
+
{
|
47801 |
+
"epoch": 0.2760823637631789,
|
47802 |
+
"grad_norm": 3.2308337688446045,
|
47803 |
+
"learning_rate": 8.269262032451052e-05,
|
47804 |
+
"loss": 0.1067,
|
47805 |
+
"step": 6828
|
47806 |
+
},
|
47807 |
+
{
|
47808 |
+
"epoch": 0.27612279761844594,
|
47809 |
+
"grad_norm": 2.841602325439453,
|
47810 |
+
"learning_rate": 8.268779484141589e-05,
|
47811 |
+
"loss": 0.0943,
|
47812 |
+
"step": 6829
|
47813 |
+
},
|
47814 |
+
{
|
47815 |
+
"epoch": 0.27616323147371297,
|
47816 |
+
"grad_norm": 8.194686889648438,
|
47817 |
+
"learning_rate": 8.268296882655436e-05,
|
47818 |
+
"loss": 0.1987,
|
47819 |
+
"step": 6830
|
47820 |
+
},
|
47821 |
+
{
|
47822 |
+
"epoch": 0.27620366532897994,
|
47823 |
+
"grad_norm": 3.6506125926971436,
|
47824 |
+
"learning_rate": 8.267814228000449e-05,
|
47825 |
+
"loss": 0.1456,
|
47826 |
+
"step": 6831
|
47827 |
+
},
|
47828 |
+
{
|
47829 |
+
"epoch": 0.27624409918424697,
|
47830 |
+
"grad_norm": 3.0571060180664062,
|
47831 |
+
"learning_rate": 8.267331520184475e-05,
|
47832 |
+
"loss": 0.1958,
|
47833 |
+
"step": 6832
|
47834 |
+
},
|
47835 |
+
{
|
47836 |
+
"epoch": 0.276284533039514,
|
47837 |
+
"grad_norm": 4.035765171051025,
|
47838 |
+
"learning_rate": 8.266848759215371e-05,
|
47839 |
+
"loss": 0.2257,
|
47840 |
+
"step": 6833
|
47841 |
+
},
|
47842 |
+
{
|
47843 |
+
"epoch": 0.276324966894781,
|
47844 |
+
"grad_norm": 5.233249664306641,
|
47845 |
+
"learning_rate": 8.266365945100986e-05,
|
47846 |
+
"loss": 0.1038,
|
47847 |
+
"step": 6834
|
47848 |
+
},
|
47849 |
+
{
|
47850 |
+
"epoch": 0.276365400750048,
|
47851 |
+
"grad_norm": 3.7331199645996094,
|
47852 |
+
"learning_rate": 8.26588307784918e-05,
|
47853 |
+
"loss": 0.1807,
|
47854 |
+
"step": 6835
|
47855 |
+
},
|
47856 |
+
{
|
47857 |
+
"epoch": 0.27640583460531504,
|
47858 |
+
"grad_norm": 4.250802516937256,
|
47859 |
+
"learning_rate": 8.265400157467805e-05,
|
47860 |
+
"loss": 0.2145,
|
47861 |
+
"step": 6836
|
47862 |
+
},
|
47863 |
+
{
|
47864 |
+
"epoch": 0.27644626846058207,
|
47865 |
+
"grad_norm": 3.114961862564087,
|
47866 |
+
"learning_rate": 8.264917183964718e-05,
|
47867 |
+
"loss": 0.1627,
|
47868 |
+
"step": 6837
|
47869 |
+
},
|
47870 |
+
{
|
47871 |
+
"epoch": 0.27648670231584904,
|
47872 |
+
"grad_norm": 4.02069616317749,
|
47873 |
+
"learning_rate": 8.264434157347774e-05,
|
47874 |
+
"loss": 0.056,
|
47875 |
+
"step": 6838
|
47876 |
+
},
|
47877 |
+
{
|
47878 |
+
"epoch": 0.2765271361711161,
|
47879 |
+
"grad_norm": 5.284739971160889,
|
47880 |
+
"learning_rate": 8.263951077624834e-05,
|
47881 |
+
"loss": 0.2062,
|
47882 |
+
"step": 6839
|
47883 |
+
},
|
47884 |
+
{
|
47885 |
+
"epoch": 0.2765675700263831,
|
47886 |
+
"grad_norm": 3.5999057292938232,
|
47887 |
+
"learning_rate": 8.263467944803756e-05,
|
47888 |
+
"loss": 0.1062,
|
47889 |
+
"step": 6840
|
47890 |
+
},
|
47891 |
+
{
|
47892 |
+
"epoch": 0.27660800388165013,
|
47893 |
+
"grad_norm": 5.171694755554199,
|
47894 |
+
"learning_rate": 8.262984758892398e-05,
|
47895 |
+
"loss": 0.1012,
|
47896 |
+
"step": 6841
|
47897 |
+
},
|
47898 |
+
{
|
47899 |
+
"epoch": 0.2766484377369171,
|
47900 |
+
"grad_norm": 7.84243106842041,
|
47901 |
+
"learning_rate": 8.26250151989862e-05,
|
47902 |
+
"loss": 0.2908,
|
47903 |
+
"step": 6842
|
47904 |
+
},
|
47905 |
+
{
|
47906 |
+
"epoch": 0.27668887159218414,
|
47907 |
+
"grad_norm": 11.819588661193848,
|
47908 |
+
"learning_rate": 8.262018227830286e-05,
|
47909 |
+
"loss": 0.2406,
|
47910 |
+
"step": 6843
|
47911 |
+
},
|
47912 |
+
{
|
47913 |
+
"epoch": 0.27672930544745117,
|
47914 |
+
"grad_norm": 3.6789815425872803,
|
47915 |
+
"learning_rate": 8.261534882695259e-05,
|
47916 |
+
"loss": 0.22,
|
47917 |
+
"step": 6844
|
47918 |
+
},
|
47919 |
+
{
|
47920 |
+
"epoch": 0.27676973930271814,
|
47921 |
+
"grad_norm": 4.107539176940918,
|
47922 |
+
"learning_rate": 8.261051484501398e-05,
|
47923 |
+
"loss": 0.2088,
|
47924 |
+
"step": 6845
|
47925 |
+
},
|
47926 |
+
{
|
47927 |
+
"epoch": 0.2768101731579852,
|
47928 |
+
"grad_norm": 3.6334245204925537,
|
47929 |
+
"learning_rate": 8.260568033256569e-05,
|
47930 |
+
"loss": 0.2398,
|
47931 |
+
"step": 6846
|
47932 |
+
},
|
47933 |
+
{
|
47934 |
+
"epoch": 0.2768506070132522,
|
47935 |
+
"grad_norm": 2.7900993824005127,
|
47936 |
+
"learning_rate": 8.260084528968638e-05,
|
47937 |
+
"loss": 0.0468,
|
47938 |
+
"step": 6847
|
47939 |
+
},
|
47940 |
+
{
|
47941 |
+
"epoch": 0.27689104086851923,
|
47942 |
+
"grad_norm": 3.0662593841552734,
|
47943 |
+
"learning_rate": 8.25960097164547e-05,
|
47944 |
+
"loss": 0.0855,
|
47945 |
+
"step": 6848
|
47946 |
+
},
|
47947 |
+
{
|
47948 |
+
"epoch": 0.2769314747237862,
|
47949 |
+
"grad_norm": 3.9684951305389404,
|
47950 |
+
"learning_rate": 8.25911736129493e-05,
|
47951 |
+
"loss": 0.1374,
|
47952 |
+
"step": 6849
|
47953 |
+
},
|
47954 |
+
{
|
47955 |
+
"epoch": 0.27697190857905324,
|
47956 |
+
"grad_norm": 7.611025333404541,
|
47957 |
+
"learning_rate": 8.258633697924887e-05,
|
47958 |
+
"loss": 0.2211,
|
47959 |
+
"step": 6850
|
47960 |
+
},
|
47961 |
+
{
|
47962 |
+
"epoch": 0.27701234243432027,
|
47963 |
+
"grad_norm": 8.749755859375,
|
47964 |
+
"learning_rate": 8.258149981543209e-05,
|
47965 |
+
"loss": 0.2505,
|
47966 |
+
"step": 6851
|
47967 |
+
},
|
47968 |
+
{
|
47969 |
+
"epoch": 0.27705277628958724,
|
47970 |
+
"grad_norm": 5.074515342712402,
|
47971 |
+
"learning_rate": 8.257666212157764e-05,
|
47972 |
+
"loss": 0.1747,
|
47973 |
+
"step": 6852
|
47974 |
+
},
|
47975 |
+
{
|
47976 |
+
"epoch": 0.2770932101448543,
|
47977 |
+
"grad_norm": 1.7904990911483765,
|
47978 |
+
"learning_rate": 8.257182389776423e-05,
|
47979 |
+
"loss": 0.1079,
|
47980 |
+
"step": 6853
|
47981 |
+
},
|
47982 |
+
{
|
47983 |
+
"epoch": 0.2771336440001213,
|
47984 |
+
"grad_norm": 5.827474594116211,
|
47985 |
+
"learning_rate": 8.256698514407058e-05,
|
47986 |
+
"loss": 0.2201,
|
47987 |
+
"step": 6854
|
47988 |
+
},
|
47989 |
+
{
|
47990 |
+
"epoch": 0.27717407785538833,
|
47991 |
+
"grad_norm": 4.419424057006836,
|
47992 |
+
"learning_rate": 8.256214586057538e-05,
|
47993 |
+
"loss": 0.1586,
|
47994 |
+
"step": 6855
|
47995 |
+
},
|
47996 |
+
{
|
47997 |
+
"epoch": 0.2772145117106553,
|
47998 |
+
"grad_norm": 2.628919839859009,
|
47999 |
+
"learning_rate": 8.255730604735738e-05,
|
48000 |
+
"loss": 0.1646,
|
48001 |
+
"step": 6856
|
48002 |
+
},
|
48003 |
+
{
|
48004 |
+
"epoch": 0.27725494556592234,
|
48005 |
+
"grad_norm": 5.962108612060547,
|
48006 |
+
"learning_rate": 8.25524657044953e-05,
|
48007 |
+
"loss": 0.1467,
|
48008 |
+
"step": 6857
|
48009 |
+
},
|
48010 |
+
{
|
48011 |
+
"epoch": 0.27729537942118937,
|
48012 |
+
"grad_norm": 5.0021653175354,
|
48013 |
+
"learning_rate": 8.25476248320679e-05,
|
48014 |
+
"loss": 0.1436,
|
48015 |
+
"step": 6858
|
48016 |
+
},
|
48017 |
+
{
|
48018 |
+
"epoch": 0.2773358132764564,
|
48019 |
+
"grad_norm": 1.9597249031066895,
|
48020 |
+
"learning_rate": 8.254278343015392e-05,
|
48021 |
+
"loss": 0.0489,
|
48022 |
+
"step": 6859
|
48023 |
+
},
|
48024 |
+
{
|
48025 |
+
"epoch": 0.2773762471317234,
|
48026 |
+
"grad_norm": 3.661406993865967,
|
48027 |
+
"learning_rate": 8.25379414988321e-05,
|
48028 |
+
"loss": 0.078,
|
48029 |
+
"step": 6860
|
48030 |
+
},
|
48031 |
+
{
|
48032 |
+
"epoch": 0.2774166809869904,
|
48033 |
+
"grad_norm": 4.2363739013671875,
|
48034 |
+
"learning_rate": 8.253309903818125e-05,
|
48035 |
+
"loss": 0.1455,
|
48036 |
+
"step": 6861
|
48037 |
+
},
|
48038 |
+
{
|
48039 |
+
"epoch": 0.27745711484225744,
|
48040 |
+
"grad_norm": 8.70430850982666,
|
48041 |
+
"learning_rate": 8.25282560482801e-05,
|
48042 |
+
"loss": 0.1908,
|
48043 |
+
"step": 6862
|
48044 |
+
},
|
48045 |
+
{
|
48046 |
+
"epoch": 0.2774975486975244,
|
48047 |
+
"grad_norm": 4.983016014099121,
|
48048 |
+
"learning_rate": 8.252341252920749e-05,
|
48049 |
+
"loss": 0.1425,
|
48050 |
+
"step": 6863
|
48051 |
+
},
|
48052 |
+
{
|
48053 |
+
"epoch": 0.27753798255279144,
|
48054 |
+
"grad_norm": 6.587481498718262,
|
48055 |
+
"learning_rate": 8.251856848104217e-05,
|
48056 |
+
"loss": 0.2777,
|
48057 |
+
"step": 6864
|
48058 |
+
},
|
48059 |
+
{
|
48060 |
+
"epoch": 0.27757841640805847,
|
48061 |
+
"grad_norm": 6.220609188079834,
|
48062 |
+
"learning_rate": 8.251372390386296e-05,
|
48063 |
+
"loss": 0.1345,
|
48064 |
+
"step": 6865
|
48065 |
+
},
|
48066 |
+
{
|
48067 |
+
"epoch": 0.2776188502633255,
|
48068 |
+
"grad_norm": 4.256330966949463,
|
48069 |
+
"learning_rate": 8.250887879774867e-05,
|
48070 |
+
"loss": 0.1311,
|
48071 |
+
"step": 6866
|
48072 |
+
},
|
48073 |
+
{
|
48074 |
+
"epoch": 0.2776592841185925,
|
48075 |
+
"grad_norm": 4.311790943145752,
|
48076 |
+
"learning_rate": 8.250403316277813e-05,
|
48077 |
+
"loss": 0.1013,
|
48078 |
+
"step": 6867
|
48079 |
+
},
|
48080 |
+
{
|
48081 |
+
"epoch": 0.2776997179738595,
|
48082 |
+
"grad_norm": 7.499460220336914,
|
48083 |
+
"learning_rate": 8.249918699903016e-05,
|
48084 |
+
"loss": 0.2482,
|
48085 |
+
"step": 6868
|
48086 |
+
},
|
48087 |
+
{
|
48088 |
+
"epoch": 0.27774015182912654,
|
48089 |
+
"grad_norm": 5.5789031982421875,
|
48090 |
+
"learning_rate": 8.249434030658361e-05,
|
48091 |
+
"loss": 0.23,
|
48092 |
+
"step": 6869
|
48093 |
+
},
|
48094 |
+
{
|
48095 |
+
"epoch": 0.27778058568439357,
|
48096 |
+
"grad_norm": 4.808448314666748,
|
48097 |
+
"learning_rate": 8.24894930855173e-05,
|
48098 |
+
"loss": 0.1793,
|
48099 |
+
"step": 6870
|
48100 |
+
},
|
48101 |
+
{
|
48102 |
+
"epoch": 0.27782101953966054,
|
48103 |
+
"grad_norm": 4.648429870605469,
|
48104 |
+
"learning_rate": 8.24846453359101e-05,
|
48105 |
+
"loss": 0.3463,
|
48106 |
+
"step": 6871
|
48107 |
+
},
|
48108 |
+
{
|
48109 |
+
"epoch": 0.27786145339492757,
|
48110 |
+
"grad_norm": 7.050698280334473,
|
48111 |
+
"learning_rate": 8.247979705784087e-05,
|
48112 |
+
"loss": 0.1316,
|
48113 |
+
"step": 6872
|
48114 |
+
},
|
48115 |
+
{
|
48116 |
+
"epoch": 0.2779018872501946,
|
48117 |
+
"grad_norm": 2.8454396724700928,
|
48118 |
+
"learning_rate": 8.247494825138849e-05,
|
48119 |
+
"loss": 0.185,
|
48120 |
+
"step": 6873
|
48121 |
+
},
|
48122 |
+
{
|
48123 |
+
"epoch": 0.2779423211054616,
|
48124 |
+
"grad_norm": 4.8122663497924805,
|
48125 |
+
"learning_rate": 8.247009891663185e-05,
|
48126 |
+
"loss": 0.2494,
|
48127 |
+
"step": 6874
|
48128 |
+
},
|
48129 |
+
{
|
48130 |
+
"epoch": 0.2779827549607286,
|
48131 |
+
"grad_norm": 7.622976303100586,
|
48132 |
+
"learning_rate": 8.24652490536498e-05,
|
48133 |
+
"loss": 0.2472,
|
48134 |
+
"step": 6875
|
48135 |
+
},
|
48136 |
+
{
|
48137 |
+
"epoch": 0.27802318881599564,
|
48138 |
+
"grad_norm": 2.229227066040039,
|
48139 |
+
"learning_rate": 8.246039866252128e-05,
|
48140 |
+
"loss": 0.0868,
|
48141 |
+
"step": 6876
|
48142 |
+
},
|
48143 |
+
{
|
48144 |
+
"epoch": 0.27806362267126267,
|
48145 |
+
"grad_norm": 6.964671611785889,
|
48146 |
+
"learning_rate": 8.245554774332518e-05,
|
48147 |
+
"loss": 0.2234,
|
48148 |
+
"step": 6877
|
48149 |
+
},
|
48150 |
+
{
|
48151 |
+
"epoch": 0.27810405652652964,
|
48152 |
+
"grad_norm": 3.054685354232788,
|
48153 |
+
"learning_rate": 8.24506962961404e-05,
|
48154 |
+
"loss": 0.1224,
|
48155 |
+
"step": 6878
|
48156 |
+
},
|
48157 |
+
{
|
48158 |
+
"epoch": 0.27814449038179667,
|
48159 |
+
"grad_norm": 3.3033480644226074,
|
48160 |
+
"learning_rate": 8.24458443210459e-05,
|
48161 |
+
"loss": 0.0781,
|
48162 |
+
"step": 6879
|
48163 |
+
},
|
48164 |
+
{
|
48165 |
+
"epoch": 0.2781849242370637,
|
48166 |
+
"grad_norm": 7.184120178222656,
|
48167 |
+
"learning_rate": 8.244099181812057e-05,
|
48168 |
+
"loss": 0.2011,
|
48169 |
+
"step": 6880
|
48170 |
+
},
|
48171 |
+
{
|
48172 |
+
"epoch": 0.27822535809233073,
|
48173 |
+
"grad_norm": 2.8741977214813232,
|
48174 |
+
"learning_rate": 8.243613878744338e-05,
|
48175 |
+
"loss": 0.145,
|
48176 |
+
"step": 6881
|
48177 |
+
},
|
48178 |
+
{
|
48179 |
+
"epoch": 0.2782657919475977,
|
48180 |
+
"grad_norm": 2.2241430282592773,
|
48181 |
+
"learning_rate": 8.243128522909327e-05,
|
48182 |
+
"loss": 0.2211,
|
48183 |
+
"step": 6882
|
48184 |
+
},
|
48185 |
+
{
|
48186 |
+
"epoch": 0.27830622580286474,
|
48187 |
+
"grad_norm": 2.268054723739624,
|
48188 |
+
"learning_rate": 8.24264311431492e-05,
|
48189 |
+
"loss": 0.0718,
|
48190 |
+
"step": 6883
|
48191 |
+
},
|
48192 |
+
{
|
48193 |
+
"epoch": 0.27834665965813177,
|
48194 |
+
"grad_norm": 4.932427883148193,
|
48195 |
+
"learning_rate": 8.242157652969013e-05,
|
48196 |
+
"loss": 0.1329,
|
48197 |
+
"step": 6884
|
48198 |
+
},
|
48199 |
+
{
|
48200 |
+
"epoch": 0.27838709351339874,
|
48201 |
+
"grad_norm": 3.135193347930908,
|
48202 |
+
"learning_rate": 8.241672138879506e-05,
|
48203 |
+
"loss": 0.1773,
|
48204 |
+
"step": 6885
|
48205 |
+
},
|
48206 |
+
{
|
48207 |
+
"epoch": 0.2784275273686658,
|
48208 |
+
"grad_norm": 3.5474231243133545,
|
48209 |
+
"learning_rate": 8.241186572054294e-05,
|
48210 |
+
"loss": 0.1244,
|
48211 |
+
"step": 6886
|
48212 |
+
},
|
48213 |
+
{
|
48214 |
+
"epoch": 0.2784679612239328,
|
48215 |
+
"grad_norm": 5.249092102050781,
|
48216 |
+
"learning_rate": 8.240700952501276e-05,
|
48217 |
+
"loss": 0.1787,
|
48218 |
+
"step": 6887
|
48219 |
+
},
|
48220 |
+
{
|
48221 |
+
"epoch": 0.27850839507919983,
|
48222 |
+
"grad_norm": 2.849830150604248,
|
48223 |
+
"learning_rate": 8.240215280228356e-05,
|
48224 |
+
"loss": 0.1976,
|
48225 |
+
"step": 6888
|
48226 |
+
},
|
48227 |
+
{
|
48228 |
+
"epoch": 0.2785488289344668,
|
48229 |
+
"grad_norm": 3.831932783126831,
|
48230 |
+
"learning_rate": 8.239729555243432e-05,
|
48231 |
+
"loss": 0.1508,
|
48232 |
+
"step": 6889
|
48233 |
+
},
|
48234 |
+
{
|
48235 |
+
"epoch": 0.27858926278973384,
|
48236 |
+
"grad_norm": 5.670083045959473,
|
48237 |
+
"learning_rate": 8.239243777554407e-05,
|
48238 |
+
"loss": 0.1866,
|
48239 |
+
"step": 6890
|
48240 |
+
},
|
48241 |
+
{
|
48242 |
+
"epoch": 0.27862969664500087,
|
48243 |
+
"grad_norm": 3.373183488845825,
|
48244 |
+
"learning_rate": 8.238757947169182e-05,
|
48245 |
+
"loss": 0.0999,
|
48246 |
+
"step": 6891
|
48247 |
+
},
|
48248 |
+
{
|
48249 |
+
"epoch": 0.2786701305002679,
|
48250 |
+
"grad_norm": 4.232305526733398,
|
48251 |
+
"learning_rate": 8.238272064095663e-05,
|
48252 |
+
"loss": 0.2286,
|
48253 |
+
"step": 6892
|
48254 |
+
},
|
48255 |
+
{
|
48256 |
+
"epoch": 0.2787105643555349,
|
48257 |
+
"grad_norm": 8.783098220825195,
|
48258 |
+
"learning_rate": 8.237786128341751e-05,
|
48259 |
+
"loss": 0.2215,
|
48260 |
+
"step": 6893
|
48261 |
+
},
|
48262 |
+
{
|
48263 |
+
"epoch": 0.2787509982108019,
|
48264 |
+
"grad_norm": 6.25701379776001,
|
48265 |
+
"learning_rate": 8.237300139915353e-05,
|
48266 |
+
"loss": 0.347,
|
48267 |
+
"step": 6894
|
48268 |
+
},
|
48269 |
+
{
|
48270 |
+
"epoch": 0.27879143206606893,
|
48271 |
+
"grad_norm": 4.344135284423828,
|
48272 |
+
"learning_rate": 8.236814098824377e-05,
|
48273 |
+
"loss": 0.1901,
|
48274 |
+
"step": 6895
|
48275 |
+
},
|
48276 |
+
{
|
48277 |
+
"epoch": 0.2788318659213359,
|
48278 |
+
"grad_norm": 5.6254496574401855,
|
48279 |
+
"learning_rate": 8.236328005076728e-05,
|
48280 |
+
"loss": 0.2202,
|
48281 |
+
"step": 6896
|
48282 |
+
},
|
48283 |
+
{
|
48284 |
+
"epoch": 0.27887229977660294,
|
48285 |
+
"grad_norm": 4.75222635269165,
|
48286 |
+
"learning_rate": 8.235841858680316e-05,
|
48287 |
+
"loss": 0.2646,
|
48288 |
+
"step": 6897
|
48289 |
+
},
|
48290 |
+
{
|
48291 |
+
"epoch": 0.27891273363186997,
|
48292 |
+
"grad_norm": 3.4938650131225586,
|
48293 |
+
"learning_rate": 8.235355659643045e-05,
|
48294 |
+
"loss": 0.1816,
|
48295 |
+
"step": 6898
|
48296 |
+
},
|
48297 |
+
{
|
48298 |
+
"epoch": 0.278953167487137,
|
48299 |
+
"grad_norm": 5.431236267089844,
|
48300 |
+
"learning_rate": 8.234869407972827e-05,
|
48301 |
+
"loss": 0.192,
|
48302 |
+
"step": 6899
|
48303 |
+
},
|
48304 |
+
{
|
48305 |
+
"epoch": 0.278993601342404,
|
48306 |
+
"grad_norm": 3.9821629524230957,
|
48307 |
+
"learning_rate": 8.234383103677574e-05,
|
48308 |
+
"loss": 0.1342,
|
48309 |
+
"step": 6900
|
48310 |
+
},
|
48311 |
+
{
|
48312 |
+
"epoch": 0.279034035197671,
|
48313 |
+
"grad_norm": 9.195260047912598,
|
48314 |
+
"learning_rate": 8.233896746765195e-05,
|
48315 |
+
"loss": 0.2889,
|
48316 |
+
"step": 6901
|
48317 |
+
},
|
48318 |
+
{
|
48319 |
+
"epoch": 0.27907446905293803,
|
48320 |
+
"grad_norm": 5.694711208343506,
|
48321 |
+
"learning_rate": 8.233410337243603e-05,
|
48322 |
+
"loss": 0.1625,
|
48323 |
+
"step": 6902
|
48324 |
+
},
|
48325 |
+
{
|
48326 |
+
"epoch": 0.27911490290820506,
|
48327 |
+
"grad_norm": 5.604170322418213,
|
48328 |
+
"learning_rate": 8.232923875120712e-05,
|
48329 |
+
"loss": 0.2108,
|
48330 |
+
"step": 6903
|
48331 |
+
},
|
48332 |
+
{
|
48333 |
+
"epoch": 0.27915533676347204,
|
48334 |
+
"grad_norm": 7.784795761108398,
|
48335 |
+
"learning_rate": 8.232437360404434e-05,
|
48336 |
+
"loss": 0.2528,
|
48337 |
+
"step": 6904
|
48338 |
+
},
|
48339 |
+
{
|
48340 |
+
"epoch": 0.27919577061873907,
|
48341 |
+
"grad_norm": 5.980737209320068,
|
48342 |
+
"learning_rate": 8.231950793102682e-05,
|
48343 |
+
"loss": 0.2172,
|
48344 |
+
"step": 6905
|
48345 |
+
},
|
48346 |
+
{
|
48347 |
+
"epoch": 0.2792362044740061,
|
48348 |
+
"grad_norm": 6.357841968536377,
|
48349 |
+
"learning_rate": 8.231464173223377e-05,
|
48350 |
+
"loss": 0.1662,
|
48351 |
+
"step": 6906
|
48352 |
+
},
|
48353 |
+
{
|
48354 |
+
"epoch": 0.2792766383292731,
|
48355 |
+
"grad_norm": 6.182690620422363,
|
48356 |
+
"learning_rate": 8.230977500774431e-05,
|
48357 |
+
"loss": 0.1997,
|
48358 |
+
"step": 6907
|
48359 |
+
},
|
48360 |
+
{
|
48361 |
+
"epoch": 0.2793170721845401,
|
48362 |
+
"grad_norm": 3.8555455207824707,
|
48363 |
+
"learning_rate": 8.230490775763764e-05,
|
48364 |
+
"loss": 0.292,
|
48365 |
+
"step": 6908
|
48366 |
+
},
|
48367 |
+
{
|
48368 |
+
"epoch": 0.27935750603980714,
|
48369 |
+
"grad_norm": 4.3104095458984375,
|
48370 |
+
"learning_rate": 8.230003998199289e-05,
|
48371 |
+
"loss": 0.1915,
|
48372 |
+
"step": 6909
|
48373 |
+
},
|
48374 |
+
{
|
48375 |
+
"epoch": 0.27939793989507417,
|
48376 |
+
"grad_norm": 3.046781063079834,
|
48377 |
+
"learning_rate": 8.229517168088931e-05,
|
48378 |
+
"loss": 0.1294,
|
48379 |
+
"step": 6910
|
48380 |
+
},
|
48381 |
+
{
|
48382 |
+
"epoch": 0.27943837375034114,
|
48383 |
+
"grad_norm": 3.3846945762634277,
|
48384 |
+
"learning_rate": 8.229030285440606e-05,
|
48385 |
+
"loss": 0.2266,
|
48386 |
+
"step": 6911
|
48387 |
+
},
|
48388 |
+
{
|
48389 |
+
"epoch": 0.27947880760560817,
|
48390 |
+
"grad_norm": 4.972047805786133,
|
48391 |
+
"learning_rate": 8.228543350262236e-05,
|
48392 |
+
"loss": 0.1336,
|
48393 |
+
"step": 6912
|
48394 |
+
},
|
48395 |
+
{
|
48396 |
+
"epoch": 0.2795192414608752,
|
48397 |
+
"grad_norm": 2.4246654510498047,
|
48398 |
+
"learning_rate": 8.228056362561743e-05,
|
48399 |
+
"loss": 0.1141,
|
48400 |
+
"step": 6913
|
48401 |
+
},
|
48402 |
+
{
|
48403 |
+
"epoch": 0.27955967531614223,
|
48404 |
+
"grad_norm": 3.0253028869628906,
|
48405 |
+
"learning_rate": 8.227569322347048e-05,
|
48406 |
+
"loss": 0.1536,
|
48407 |
+
"step": 6914
|
48408 |
+
},
|
48409 |
+
{
|
48410 |
+
"epoch": 0.2796001091714092,
|
48411 |
+
"grad_norm": 8.703618049621582,
|
48412 |
+
"learning_rate": 8.227082229626076e-05,
|
48413 |
+
"loss": 0.1645,
|
48414 |
+
"step": 6915
|
48415 |
+
},
|
48416 |
+
{
|
48417 |
+
"epoch": 0.27964054302667624,
|
48418 |
+
"grad_norm": 3.3416714668273926,
|
48419 |
+
"learning_rate": 8.226595084406748e-05,
|
48420 |
+
"loss": 0.2456,
|
48421 |
+
"step": 6916
|
48422 |
+
},
|
48423 |
+
{
|
48424 |
+
"epoch": 0.27968097688194327,
|
48425 |
+
"grad_norm": 5.402046203613281,
|
48426 |
+
"learning_rate": 8.226107886696992e-05,
|
48427 |
+
"loss": 0.1841,
|
48428 |
+
"step": 6917
|
48429 |
+
},
|
48430 |
+
{
|
48431 |
+
"epoch": 0.27972141073721024,
|
48432 |
+
"grad_norm": 3.5431480407714844,
|
48433 |
+
"learning_rate": 8.225620636504732e-05,
|
48434 |
+
"loss": 0.1162,
|
48435 |
+
"step": 6918
|
48436 |
+
},
|
48437 |
+
{
|
48438 |
+
"epoch": 0.27976184459247727,
|
48439 |
+
"grad_norm": 2.7912280559539795,
|
48440 |
+
"learning_rate": 8.225133333837895e-05,
|
48441 |
+
"loss": 0.2295,
|
48442 |
+
"step": 6919
|
48443 |
+
},
|
48444 |
+
{
|
48445 |
+
"epoch": 0.2798022784477443,
|
48446 |
+
"grad_norm": 5.85484504699707,
|
48447 |
+
"learning_rate": 8.224645978704409e-05,
|
48448 |
+
"loss": 0.3039,
|
48449 |
+
"step": 6920
|
48450 |
+
},
|
48451 |
+
{
|
48452 |
+
"epoch": 0.27984271230301133,
|
48453 |
+
"grad_norm": 3.1748459339141846,
|
48454 |
+
"learning_rate": 8.2241585711122e-05,
|
48455 |
+
"loss": 0.1467,
|
48456 |
+
"step": 6921
|
48457 |
+
},
|
48458 |
+
{
|
48459 |
+
"epoch": 0.2798831461582783,
|
48460 |
+
"grad_norm": 4.09371280670166,
|
48461 |
+
"learning_rate": 8.223671111069201e-05,
|
48462 |
+
"loss": 0.1997,
|
48463 |
+
"step": 6922
|
48464 |
+
},
|
48465 |
+
{
|
48466 |
+
"epoch": 0.27992358001354534,
|
48467 |
+
"grad_norm": 2.968587875366211,
|
48468 |
+
"learning_rate": 8.223183598583341e-05,
|
48469 |
+
"loss": 0.199,
|
48470 |
+
"step": 6923
|
48471 |
+
},
|
48472 |
+
{
|
48473 |
+
"epoch": 0.27996401386881237,
|
48474 |
+
"grad_norm": 5.885176658630371,
|
48475 |
+
"learning_rate": 8.222696033662548e-05,
|
48476 |
+
"loss": 0.1386,
|
48477 |
+
"step": 6924
|
48478 |
+
},
|
48479 |
+
{
|
48480 |
+
"epoch": 0.2800044477240794,
|
48481 |
+
"grad_norm": 4.820009231567383,
|
48482 |
+
"learning_rate": 8.222208416314756e-05,
|
48483 |
+
"loss": 0.1913,
|
48484 |
+
"step": 6925
|
48485 |
+
},
|
48486 |
+
{
|
48487 |
+
"epoch": 0.28004488157934637,
|
48488 |
+
"grad_norm": 3.570120096206665,
|
48489 |
+
"learning_rate": 8.221720746547899e-05,
|
48490 |
+
"loss": 0.0671,
|
48491 |
+
"step": 6926
|
48492 |
+
},
|
48493 |
+
{
|
48494 |
+
"epoch": 0.2800853154346134,
|
48495 |
+
"grad_norm": 7.948146820068359,
|
48496 |
+
"learning_rate": 8.221233024369906e-05,
|
48497 |
+
"loss": 0.1388,
|
48498 |
+
"step": 6927
|
48499 |
+
},
|
48500 |
+
{
|
48501 |
+
"epoch": 0.28012574928988043,
|
48502 |
+
"grad_norm": 7.341013431549072,
|
48503 |
+
"learning_rate": 8.220745249788718e-05,
|
48504 |
+
"loss": 0.1484,
|
48505 |
+
"step": 6928
|
48506 |
+
},
|
48507 |
+
{
|
48508 |
+
"epoch": 0.2801661831451474,
|
48509 |
+
"grad_norm": 4.7413434982299805,
|
48510 |
+
"learning_rate": 8.220257422812264e-05,
|
48511 |
+
"loss": 0.2428,
|
48512 |
+
"step": 6929
|
48513 |
+
},
|
48514 |
+
{
|
48515 |
+
"epoch": 0.28020661700041444,
|
48516 |
+
"grad_norm": 2.796992063522339,
|
48517 |
+
"learning_rate": 8.21976954344848e-05,
|
48518 |
+
"loss": 0.1232,
|
48519 |
+
"step": 6930
|
48520 |
+
},
|
48521 |
+
{
|
48522 |
+
"epoch": 0.28024705085568147,
|
48523 |
+
"grad_norm": 2.65800142288208,
|
48524 |
+
"learning_rate": 8.219281611705308e-05,
|
48525 |
+
"loss": 0.0929,
|
48526 |
+
"step": 6931
|
48527 |
}
|
48528 |
],
|
48529 |
"logging_steps": 1,
|
|
|
48543 |
"attributes": {}
|
48544 |
}
|
48545 |
},
|
48546 |
+
"total_flos": 4.286882479425454e+17,
|
48547 |
"train_batch_size": 4,
|
48548 |
"trial_name": null,
|
48549 |
"trial_params": null
|