Training in progress, step 75, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +187 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6ba54415592535b5deb4d426438a9c2d3fda44dd59cc053b65b3d50072d6931
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7022b6e69a94a91ad8321c9067d9a5c6755a8ac6835245f81dfec9d8592b014
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a7a35b2bab9539cc55cf97a00eb49ce0480bebbd5aae9cb3bdfdaee31e4b1fa
 size 202110330

 version https://git-lfs.github.com/spec/v1
+oid sha256:24ce223612504b96db57f6768c022ce69aa94f86dad7eb538d68af3853c038ce
 size 202110330

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66b738e61e29b3536eb6d5c0f3f4d525d915958f2cf65da2df3e0128bb584b89
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:74af9c0b499d140dc77b9c7546adca9c0a5bab9901675167855f2f2310db46b5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46fa8207e86dee7d50b0ab12f1dd18c4426e8c65d06f97f8b2bd004a747e9cfa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:212837ccb433e5430b061dc107b19dc09e932e6cfb62a751187d0903b7b0d94e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.14657383657017223,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,189 @@
       "eval_samples_per_second": 13.773,
       "eval_steps_per_second": 6.887,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -395,12 +578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.0573970474532864e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.21986075485525833,
   "eval_steps": 25,
+  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.773,
       "eval_steps_per_second": 6.887,
       "step": 50
+    },
+    {
+      "epoch": 0.14950531330157568,
+      "grad_norm": 1.3906869888305664,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.2653,
+      "step": 51
+    },
+    {
+      "epoch": 0.1524367900329791,
+      "grad_norm": 0.8645505309104919,
+      "learning_rate": 2.3135019582658802e-05,
+      "loss": 0.0972,
+      "step": 52
+    },
+    {
+      "epoch": 0.15536826676438256,
+      "grad_norm": 0.3462808430194855,
+      "learning_rate": 2.132117818244771e-05,
+      "loss": 0.0178,
+      "step": 53
+    },
+    {
+      "epoch": 0.158299743495786,
+      "grad_norm": 0.8373314142227173,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.1511,
+      "step": 54
+    },
+    {
+      "epoch": 0.16123122022718944,
+      "grad_norm": 0.5848434567451477,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 0.0587,
+      "step": 55
+    },
+    {
+      "epoch": 0.1641626969585929,
+      "grad_norm": 0.7180687785148621,
+      "learning_rate": 1.622048961921699e-05,
+      "loss": 0.1289,
+      "step": 56
+    },
+    {
+      "epoch": 0.16709417368999632,
+      "grad_norm": 0.4832279086112976,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.0529,
+      "step": 57
+    },
+    {
+      "epoch": 0.17002565042139978,
+      "grad_norm": 0.5206469893455505,
+      "learning_rate": 1.3136133159493802e-05,
+      "loss": 0.054,
+      "step": 58
+    },
+    {
+      "epoch": 0.17295712715280323,
+      "grad_norm": 0.3522478938102722,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 0.0454,
+      "step": 59
+    },
+    {
+      "epoch": 0.17588860388420666,
+      "grad_norm": 0.6369943022727966,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.0807,
+      "step": 60
+    },
+    {
+      "epoch": 0.1788200806156101,
+      "grad_norm": 0.4056720435619354,
+      "learning_rate": 9.042397785550405e-06,
+      "loss": 0.0568,
+      "step": 61
+    },
+    {
+      "epoch": 0.18175155734701356,
+      "grad_norm": 0.4317684471607208,
+      "learning_rate": 7.830427709355725e-06,
+      "loss": 0.0559,
+      "step": 62
+    },
+    {
+      "epoch": 0.184683034078417,
+      "grad_norm": 0.5188791751861572,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.0894,
+      "step": 63
+    },
+    {
+      "epoch": 0.18761451080982045,
+      "grad_norm": 0.4226022958755493,
+      "learning_rate": 5.649458341088915e-06,
+      "loss": 0.0641,
+      "step": 64
+    },
+    {
+      "epoch": 0.1905459875412239,
+      "grad_norm": 0.6744760870933533,
+      "learning_rate": 4.684610648167503e-06,
+      "loss": 0.0995,
+      "step": 65
+    },
+    {
+      "epoch": 0.19347746427262733,
+      "grad_norm": 0.36501771211624146,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.0554,
+      "step": 66
+    },
+    {
+      "epoch": 0.19640894100403078,
+      "grad_norm": 0.4740172326564789,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.0403,
+      "step": 67
+    },
+    {
+      "epoch": 0.19934041773543423,
+      "grad_norm": 0.37339267134666443,
+      "learning_rate": 2.314152462588659e-06,
+      "loss": 0.0429,
+      "step": 68
+    },
+    {
+      "epoch": 0.20227189446683766,
+      "grad_norm": 0.46301230788230896,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.0245,
+      "step": 69
+    },
+    {
+      "epoch": 0.2052033711982411,
+      "grad_norm": 0.37362566590309143,
+      "learning_rate": 1.1851996440033319e-06,
+      "loss": 0.0502,
+      "step": 70
+    },
+    {
+      "epoch": 0.20813484792964457,
+      "grad_norm": 0.3411478102207184,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.0327,
+      "step": 71
+    },
+    {
+      "epoch": 0.211066324661048,
+      "grad_norm": 0.6511849761009216,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.0708,
+      "step": 72
+    },
+    {
+      "epoch": 0.21399780139245145,
+      "grad_norm": 0.4536079168319702,
+      "learning_rate": 1.9026509541272275e-07,
+      "loss": 0.05,
+      "step": 73
+    },
+    {
+      "epoch": 0.2169292781238549,
+      "grad_norm": 0.5318543910980225,
+      "learning_rate": 4.7588920907110094e-08,
+      "loss": 0.0867,
+      "step": 74
+    },
+    {
+      "epoch": 0.21986075485525833,
+      "grad_norm": 0.3312048017978668,
+      "learning_rate": 0.0,
+      "loss": 0.0279,
+      "step": 75
+    },
+    {
+      "epoch": 0.21986075485525833,
+      "eval_loss": 0.06166088581085205,
+      "eval_runtime": 20.9069,
+      "eval_samples_per_second": 13.775,
+      "eval_steps_per_second": 6.888,
+      "step": 75
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.5860955711799296e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null