Training in progress, step 150, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3dfed9da18e2a0aa5724586b2dca86c36772ef0e1b9e4cacf4f858c70fab168
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2170ae8d08ba85a70900bc8d34e067926432cd8c6ae9a7a628dbcaafba7b35e
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f47773a51c10f9eeab921f21cb6cd939742832f0dccc571871750f5f76b2f68
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c011e239765023dd2a6794a56eba6e53de6ace5c477d02cbf603c6151929685
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43feb5d67a9fc661e502a4768b53cb9f5ceea2499482a7743fa63ddedb8e8aca
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f6e23a05cbfc0213723b7cf803a83a3bebb0eaebe9bfe5d290a6e2c7a19e84d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a71f60f9bd2ac4332e4682c06bcf0e8b6f23e0f17b0fa94d84f1fe3045e8b4a8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0edab479cf5df2fd0e0eb08833b9040a0342b7b3b1ce5f746c88e4c78156c68
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.6,
   "eval_steps": 13,
-  "global_step": 143,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -432,6 +432,27 @@
       "eval_samples_per_second": 9.958,
       "eval_steps_per_second": 1.285,
       "step": 143
     }
   ],
   "logging_steps": 3,
@@ -446,12 +467,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.234818805423145e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.7272727272727275,
   "eval_steps": 13,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.958,
       "eval_steps_per_second": 1.285,
       "step": 143
+    },
+    {
+      "epoch": 2.618181818181818,
+      "grad_norm": 0.3637115955352783,
+      "learning_rate": 4.52511911603265e-07,
+      "loss": 0.036,
+      "step": 144
+    },
+    {
+      "epoch": 2.672727272727273,
+      "grad_norm": 0.41367557644844055,
+      "learning_rate": 1.132562476771959e-07,
+      "loss": 0.0382,
+      "step": 147
+    },
+    {
+      "epoch": 2.7272727272727275,
+      "grad_norm": 0.5478535294532776,
+      "learning_rate": 0.0,
+      "loss": 0.0333,
+      "step": 150
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.389441447244595e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null