Training in progress, step 7000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6320e5e04036474077325413e126d45f20eb9559576e15009eee9ee624ba6f9b
 size 885543556

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4b4950ec4553701316a38db86e6f6295d1f6234c623e7f9544234103a799366
 size 885543556

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b0d1be6a165da75bfc50abc44dc1aa41dc57f416fffef892846f1b40e75eb41
 size 1771208634

 version https://git-lfs.github.com/spec/v1
+oid sha256:272c90f1b8dc6460197ecfa66da6e571c358079cef3a5cff70d6ea3b66dcc445
 size 1771208634

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b907c23c56b7c89000911a48cbbc42aa7d3f45856b2aca6aaf8b8da8a09ed81
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f744bbc38e658468c6bf51d5993f8d0942abc953359665a93436c6238eb03d14
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38dbe8bf8259b5e5933776f078cb332ff62ea3bce20838592d9cf556b486751f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2d78140d37121396abc5bbbfa27946bc347cfb9af41c9c096faa075158cc229
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.09452483094597543,
   "eval_steps": 500,
-  "global_step": 6500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -98,6 +98,13 @@
       "learning_rate": 1.9369834460360166e-05,
       "loss": 5.9525,
       "step": 6500
     }
   ],
   "logging_steps": 500,
@@ -117,7 +124,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1407146164591824.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.10179597178797353,
   "eval_steps": 500,
+  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9369834460360166e-05,
       "loss": 5.9525,
       "step": 6500
+    },
+    {
+      "epoch": 0.10179597178797353,
+      "grad_norm": 14.797924041748047,
+      "learning_rate": 1.9321360188080178e-05,
+      "loss": 5.8114,
+      "step": 7000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 1513105308502176.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null