Training in progress, epoch 1, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2ab5db3993fff6bfef113bb7ce934d9e7921403f60f33d0c213b7953b6e526a2
 size 2283652852

 version https://git-lfs.github.com/spec/v1
+oid sha256:7fe61c30fb1859076bb7e7bb5d429b44a52c3556e9438b60b8a414fd8ac15606
 size 2283652852

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afc440de8bd791363d8b856fa31330ed582eb260f6f70e14d3fe91d4e5ee0bfb
 size 4550170737

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d6378dd29bf82ea5a0b3367c62d87bebd129d2d9229a826842314387426cdfb
 size 4550170737

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0cca5995e1c99590b9eb14846be54161c10a96568f6182c0d5960bfcdfdc7881
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0046d0fdcb58af9aa23af567ef64e74f1d8b2a40853fefe0d870e0884438c05c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab8935b579d5547eb6c7c8c3bf6883b4682c791d2ebf6a0e9a040f1ef1b4b330
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6999eebde348427dc9a087843dcc87da500631a1194e084f76cd498c0a38b33
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9976019184652278,
   "eval_steps": 500,
-  "global_step": 104,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -21,6 +21,13 @@
       "learning_rate": 1.3164556962025317e-05,
       "loss": 6.4196,
       "step": 104
     }
   ],
   "logging_steps": 2,
@@ -35,12 +42,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4731035212480512.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9952038369304557,
   "eval_steps": 500,
+  "global_step": 208,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.3164556962025317e-05,
       "loss": 6.4196,
       "step": 104
+    },
+    {
+      "epoch": 1.9952038369304557,
+      "grad_norm": 5.404126167297363,
+      "learning_rate": 0.0,
+      "loss": 5.713,
+      "step": 208
     }
   ],
   "logging_steps": 2,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9439352575352832.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null