Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efc121fefcaba1cb5abd106aa13b76e130d03f44790eab2cf86c6b467bc10c4d
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:95528d72beb5752d8ea28f2e99b5e0f35edfdd6c5e259b407c431a88f11fcbdf
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8c13c05d0a055988133b2e6da8de9b82dc73dc72b41d168e563cc1589e1ac0c
 size 320194002

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c9272e2da91258e98075d4f446b38c59d55ec672ca3ea3c8e430dbf11d137ab
 size 320194002

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e10543bbab998d137ae336d61787d15d98f8f909c2ebc3591c5ebba12f6965b2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a26e99dd2a013c3b0921e5dffe94c5852ce6eba5df9895948b674cd0465d8405
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.3408501148223877,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.05332859301395432,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 9.564,
       "eval_steps_per_second": 1.339,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.0863695152637542e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.258071184158325,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.07110479068527242,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.564,
       "eval_steps_per_second": 1.339,
       "step": 150
+    },
+    {
+      "epoch": 0.05368411696738068,
+      "grad_norm": 8.439820289611816,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 8.0936,
+      "step": 151
+    },
+    {
+      "epoch": 0.05403964092080704,
+      "grad_norm": 8.081132888793945,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 8.6621,
+      "step": 152
+    },
+    {
+      "epoch": 0.054395164874233404,
+      "grad_norm": 6.151838302612305,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 7.5835,
+      "step": 153
+    },
+    {
+      "epoch": 0.054750688827659766,
+      "grad_norm": 8.284475326538086,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 10.0106,
+      "step": 154
+    },
+    {
+      "epoch": 0.05510621278108613,
+      "grad_norm": 7.27126407623291,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 9.6403,
+      "step": 155
+    },
+    {
+      "epoch": 0.05546173673451249,
+      "grad_norm": 7.734671592712402,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 10.083,
+      "step": 156
+    },
+    {
+      "epoch": 0.05581726068793885,
+      "grad_norm": 5.8332953453063965,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 9.5724,
+      "step": 157
+    },
+    {
+      "epoch": 0.056172784641365214,
+      "grad_norm": 5.762612342834473,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 9.3623,
+      "step": 158
+    },
+    {
+      "epoch": 0.056528308594791576,
+      "grad_norm": 5.732877254486084,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 9.0708,
+      "step": 159
+    },
+    {
+      "epoch": 0.05688383254821794,
+      "grad_norm": 5.780948162078857,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 10.1664,
+      "step": 160
+    },
+    {
+      "epoch": 0.0572393565016443,
+      "grad_norm": 5.682799816131592,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 9.6681,
+      "step": 161
+    },
+    {
+      "epoch": 0.05759488045507066,
+      "grad_norm": 5.960980415344238,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 9.9709,
+      "step": 162
+    },
+    {
+      "epoch": 0.057950404408497025,
+      "grad_norm": 5.4174957275390625,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 9.5858,
+      "step": 163
+    },
+    {
+      "epoch": 0.05830592836192339,
+      "grad_norm": 5.20405387878418,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 9.719,
+      "step": 164
+    },
+    {
+      "epoch": 0.05866145231534975,
+      "grad_norm": 5.480771064758301,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 10.4264,
+      "step": 165
+    },
+    {
+      "epoch": 0.05901697626877611,
+      "grad_norm": 6.35004997253418,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 8.6918,
+      "step": 166
+    },
+    {
+      "epoch": 0.05937250022220247,
+      "grad_norm": 5.852508068084717,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 10.1135,
+      "step": 167
+    },
+    {
+      "epoch": 0.059728024175628835,
+      "grad_norm": 5.315590858459473,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 10.242,
+      "step": 168
+    },
+    {
+      "epoch": 0.0600835481290552,
+      "grad_norm": 5.59436559677124,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 9.3808,
+      "step": 169
+    },
+    {
+      "epoch": 0.06043907208248156,
+      "grad_norm": 5.4144606590271,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 9.9846,
+      "step": 170
+    },
+    {
+      "epoch": 0.06079459603590792,
+      "grad_norm": 6.142887592315674,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 10.2045,
+      "step": 171
+    },
+    {
+      "epoch": 0.061150119989334284,
+      "grad_norm": 5.897428035736084,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 9.3935,
+      "step": 172
+    },
+    {
+      "epoch": 0.061505643942760646,
+      "grad_norm": 5.910777568817139,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 9.7209,
+      "step": 173
+    },
+    {
+      "epoch": 0.06186116789618701,
+      "grad_norm": 6.222726821899414,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 9.6761,
+      "step": 174
+    },
+    {
+      "epoch": 0.06221669184961337,
+      "grad_norm": 5.125181198120117,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 8.4231,
+      "step": 175
+    },
+    {
+      "epoch": 0.06221669184961337,
+      "eval_loss": 2.2541375160217285,
+      "eval_runtime": 5.2294,
+      "eval_samples_per_second": 9.561,
+      "eval_steps_per_second": 1.339,
+      "step": 175
+    },
+    {
+      "epoch": 0.06257221580303973,
+      "grad_norm": 5.437519550323486,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 9.1396,
+      "step": 176
+    },
+    {
+      "epoch": 0.0629277397564661,
+      "grad_norm": 5.65785551071167,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 9.3145,
+      "step": 177
+    },
+    {
+      "epoch": 0.06328326370989246,
+      "grad_norm": 6.449829578399658,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 10.6776,
+      "step": 178
+    },
+    {
+      "epoch": 0.06363878766331882,
+      "grad_norm": 5.781719207763672,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 9.2837,
+      "step": 179
+    },
+    {
+      "epoch": 0.06399431161674518,
+      "grad_norm": 6.302221298217773,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 9.5917,
+      "step": 180
+    },
+    {
+      "epoch": 0.06434983557017154,
+      "grad_norm": 6.922592639923096,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 10.2706,
+      "step": 181
+    },
+    {
+      "epoch": 0.0647053595235979,
+      "grad_norm": 6.361343860626221,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 10.5921,
+      "step": 182
+    },
+    {
+      "epoch": 0.06506088347702427,
+      "grad_norm": 6.438650131225586,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 9.2032,
+      "step": 183
+    },
+    {
+      "epoch": 0.06541640743045063,
+      "grad_norm": 5.722344875335693,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 10.2673,
+      "step": 184
+    },
+    {
+      "epoch": 0.06577193138387699,
+      "grad_norm": 5.676843643188477,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 9.6069,
+      "step": 185
+    },
+    {
+      "epoch": 0.06612745533730335,
+      "grad_norm": 5.7516303062438965,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 10.1288,
+      "step": 186
+    },
+    {
+      "epoch": 0.06648297929072972,
+      "grad_norm": 5.490580081939697,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 9.7611,
+      "step": 187
+    },
+    {
+      "epoch": 0.06683850324415608,
+      "grad_norm": 6.059674263000488,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 10.7803,
+      "step": 188
+    },
+    {
+      "epoch": 0.06719402719758244,
+      "grad_norm": 6.390851020812988,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 10.2995,
+      "step": 189
+    },
+    {
+      "epoch": 0.0675495511510088,
+      "grad_norm": 6.440296649932861,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 10.1124,
+      "step": 190
+    },
+    {
+      "epoch": 0.06790507510443516,
+      "grad_norm": 7.456547737121582,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 10.2623,
+      "step": 191
+    },
+    {
+      "epoch": 0.06826059905786153,
+      "grad_norm": 6.4957709312438965,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 9.9391,
+      "step": 192
+    },
+    {
+      "epoch": 0.06861612301128789,
+      "grad_norm": 6.481423854827881,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 10.0795,
+      "step": 193
+    },
+    {
+      "epoch": 0.06897164696471425,
+      "grad_norm": 9.493501663208008,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 9.7269,
+      "step": 194
+    },
+    {
+      "epoch": 0.06932717091814061,
+      "grad_norm": 6.7120561599731445,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 9.9236,
+      "step": 195
+    },
+    {
+      "epoch": 0.06968269487156697,
+      "grad_norm": 7.084906578063965,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 10.1646,
+      "step": 196
+    },
+    {
+      "epoch": 0.07003821882499334,
+      "grad_norm": 7.463186264038086,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 10.8669,
+      "step": 197
+    },
+    {
+      "epoch": 0.0703937427784197,
+      "grad_norm": 7.703351974487305,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 10.6108,
+      "step": 198
+    },
+    {
+      "epoch": 0.07074926673184606,
+      "grad_norm": 8.033586502075195,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 10.088,
+      "step": 199
+    },
+    {
+      "epoch": 0.07110479068527242,
+      "grad_norm": 9.093062400817871,
+      "learning_rate": 0.0,
+      "loss": 7.8546,
+      "step": 200
+    },
+    {
+      "epoch": 0.07110479068527242,
+      "eval_loss": 2.258071184158325,
+      "eval_runtime": 5.2303,
+      "eval_samples_per_second": 9.56,
+      "eval_steps_per_second": 1.338,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.7806688547858022e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null