Training in progress, step 27, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +74 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ace6cb9505b630cc42f296da21a26f3b567183b3a9811fb87fbbb9091993720
 size 2264640

 version https://git-lfs.github.com/spec/v1
+oid sha256:5dd5f0c8fe5d8de2d7c86300e4683230c6f6bdfe172fc1b08a6bb0547632b0ee
 size 2264640

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8e094ef751cbd269068b095cde0e43290ede08203abd0c895a15a9461015938
 size 1183674

 version https://git-lfs.github.com/spec/v1
+oid sha256:003e14264bc10d7bf94a2ca481eace96bd4bf67a4383fef49980269dd9687bd6
 size 1183674

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20a90b6a2c70e12c09fb32cf9d04cacc11c2523c58a742e731a65ec3e279e352
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d25eb66897bc9db464c68a2f7e7d3cb410ee7dcd3fd0e1db4ed0c22dbb4df02
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f72b88be9468665c95a0d3c5676292c6feab6bcdb9e687c2b969b2a3bbd6c3c0
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e25540055d73f0c84acced376b2284a3c9b0e959187ff9560a1efc5e7517001
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e36117d22c9a63b8dad3fdec6160f4a75b956bf530cd84159b5adbb0baea0ea0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:33ef6f4b0dc1a0ee466ac9818efdab5291728661390e42389cd9c4df42291c75
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5017421602787456,
   "eval_steps": 9,
-  "global_step": 18,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -157,6 +157,77 @@
       "eval_samples_per_second": 191.38,
       "eval_steps_per_second": 48.24,
       "step": 18
     }
   ],
   "logging_steps": 1,
@@ -176,7 +247,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3078865861738496.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7526132404181185,
   "eval_steps": 9,
+  "global_step": 27,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 191.38,
       "eval_steps_per_second": 48.24,
       "step": 18
+    },
+    {
+      "epoch": 0.5296167247386759,
+      "grad_norm": 0.9348726868629456,
+      "learning_rate": 5e-05,
+      "loss": 4.7054,
+      "step": 19
+    },
+    {
+      "epoch": 0.5574912891986062,
+      "grad_norm": 0.8965240120887756,
+      "learning_rate": 4.5386582026834906e-05,
+      "loss": 4.7152,
+      "step": 20
+    },
+    {
+      "epoch": 0.5853658536585366,
+      "grad_norm": 0.8576871752738953,
+      "learning_rate": 4.0812524109171476e-05,
+      "loss": 4.629,
+      "step": 21
+    },
+    {
+      "epoch": 0.6132404181184669,
+      "grad_norm": 0.8105357885360718,
+      "learning_rate": 3.631685049639586e-05,
+      "loss": 4.6084,
+      "step": 22
+    },
+    {
+      "epoch": 0.6411149825783972,
+      "grad_norm": 0.7387930750846863,
+      "learning_rate": 3.1937916690642356e-05,
+      "loss": 4.5363,
+      "step": 23
+    },
+    {
+      "epoch": 0.6689895470383276,
+      "grad_norm": 0.8787197470664978,
+      "learning_rate": 2.771308221117309e-05,
+      "loss": 4.639,
+      "step": 24
+    },
+    {
+      "epoch": 0.6968641114982579,
+      "grad_norm": 0.8041818141937256,
+      "learning_rate": 2.3678391856132204e-05,
+      "loss": 4.583,
+      "step": 25
+    },
+    {
+      "epoch": 0.7247386759581882,
+      "grad_norm": 0.7042474746704102,
+      "learning_rate": 1.9868268181037185e-05,
+      "loss": 4.5931,
+      "step": 26
+    },
+    {
+      "epoch": 0.7526132404181185,
+      "grad_norm": 0.6868075132369995,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 4.5914,
+      "step": 27
+    },
+    {
+      "epoch": 0.7526132404181185,
+      "eval_loss": 4.399294376373291,
+      "eval_runtime": 1.2619,
+      "eval_samples_per_second": 191.767,
+      "eval_steps_per_second": 48.338,
+      "step": 27
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4602071416307712.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null