Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ccc85b6fbb7c856a3850b77b05669feec1027e37fd094c1945d00a76c759718
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5645df2ca95bbe8cc1ec63995ce0e55780ed228f662981c74bb5602f846ae8c
 size 159967880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23443dd6f9908d028c0bfad8dd61e2e81a180370b0992a3feb722e9b00ec036b
 size 320194002

 version https://git-lfs.github.com/spec/v1
+oid sha256:15e3cbd9a663460518f65bdba97f0baa8418b89ec974a14365d5821811533e21
 size 320194002

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0433564a4df99854aec9a017c1fcad300ed1f14852fd31500bd301a930c80ccd
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7c5fef28c0ed8ef1bf423ac5868c69c38b933eb243dcfd9a3c5f873c360c532
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8916868567466736,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.1583011583011582,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 9.319,
       "eval_steps_per_second": 1.305,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.1037477394173133e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.8602708578109741,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.5444015444015444,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.319,
       "eval_steps_per_second": 1.305,
       "step": 150
+    },
+    {
+      "epoch": 1.166023166023166,
+      "grad_norm": 1.7999972105026245,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 4.194,
+      "step": 151
+    },
+    {
+      "epoch": 1.1737451737451738,
+      "grad_norm": 2.0590758323669434,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 4.3798,
+      "step": 152
+    },
+    {
+      "epoch": 1.1814671814671815,
+      "grad_norm": 1.7075778245925903,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 3.6218,
+      "step": 153
+    },
+    {
+      "epoch": 1.1891891891891893,
+      "grad_norm": 1.822914481163025,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 4.1097,
+      "step": 154
+    },
+    {
+      "epoch": 1.196911196911197,
+      "grad_norm": 1.7491306066513062,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 3.969,
+      "step": 155
+    },
+    {
+      "epoch": 1.2046332046332047,
+      "grad_norm": 1.8288490772247314,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 4.5371,
+      "step": 156
+    },
+    {
+      "epoch": 1.2123552123552124,
+      "grad_norm": 1.784482717514038,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 3.7527,
+      "step": 157
+    },
+    {
+      "epoch": 1.22007722007722,
+      "grad_norm": 1.966124176979065,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 4.3173,
+      "step": 158
+    },
+    {
+      "epoch": 1.2277992277992278,
+      "grad_norm": 1.9975626468658447,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 4.6156,
+      "step": 159
+    },
+    {
+      "epoch": 1.2355212355212355,
+      "grad_norm": 2.316918134689331,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 4.0331,
+      "step": 160
+    },
+    {
+      "epoch": 1.2432432432432432,
+      "grad_norm": 2.4572503566741943,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 3.5958,
+      "step": 161
+    },
+    {
+      "epoch": 1.250965250965251,
+      "grad_norm": 2.1423158645629883,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 3.1944,
+      "step": 162
+    },
+    {
+      "epoch": 1.2586872586872586,
+      "grad_norm": 1.1138101816177368,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 3.0913,
+      "step": 163
+    },
+    {
+      "epoch": 1.2664092664092665,
+      "grad_norm": 0.9798049926757812,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 3.2327,
+      "step": 164
+    },
+    {
+      "epoch": 1.2741312741312742,
+      "grad_norm": 0.974193274974823,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 3.1556,
+      "step": 165
+    },
+    {
+      "epoch": 1.281853281853282,
+      "grad_norm": 1.0049970149993896,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 2.9129,
+      "step": 166
+    },
+    {
+      "epoch": 1.2895752895752897,
+      "grad_norm": 0.9587703347206116,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 3.1648,
+      "step": 167
+    },
+    {
+      "epoch": 1.2972972972972974,
+      "grad_norm": 0.9530733227729797,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 2.9332,
+      "step": 168
+    },
+    {
+      "epoch": 1.305019305019305,
+      "grad_norm": 0.9956369400024414,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 3.1722,
+      "step": 169
+    },
+    {
+      "epoch": 1.3127413127413128,
+      "grad_norm": 1.0111753940582275,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 3.1122,
+      "step": 170
+    },
+    {
+      "epoch": 1.3204633204633205,
+      "grad_norm": 1.0399214029312134,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 3.141,
+      "step": 171
+    },
+    {
+      "epoch": 1.3281853281853282,
+      "grad_norm": 1.0311812162399292,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 3.0995,
+      "step": 172
+    },
+    {
+      "epoch": 1.335907335907336,
+      "grad_norm": 1.0047649145126343,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 2.8465,
+      "step": 173
+    },
+    {
+      "epoch": 1.3436293436293436,
+      "grad_norm": 1.0682458877563477,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 2.9523,
+      "step": 174
+    },
+    {
+      "epoch": 1.3513513513513513,
+      "grad_norm": 1.0513615608215332,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 3.0281,
+      "step": 175
+    },
+    {
+      "epoch": 1.3513513513513513,
+      "eval_loss": 0.8898646831512451,
+      "eval_runtime": 5.3569,
+      "eval_samples_per_second": 9.334,
+      "eval_steps_per_second": 1.307,
+      "step": 175
+    },
+    {
+      "epoch": 1.359073359073359,
+      "grad_norm": 1.0834391117095947,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 2.8727,
+      "step": 176
+    },
+    {
+      "epoch": 1.3667953667953667,
+      "grad_norm": 1.0857892036437988,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 2.9288,
+      "step": 177
+    },
+    {
+      "epoch": 1.3745173745173744,
+      "grad_norm": 1.1154727935791016,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 3.056,
+      "step": 178
+    },
+    {
+      "epoch": 1.3822393822393821,
+      "grad_norm": 1.1769471168518066,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 3.2476,
+      "step": 179
+    },
+    {
+      "epoch": 1.3899613899613898,
+      "grad_norm": 1.2554057836532593,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 3.6617,
+      "step": 180
+    },
+    {
+      "epoch": 1.3976833976833976,
+      "grad_norm": 1.3026721477508545,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 3.8659,
+      "step": 181
+    },
+    {
+      "epoch": 1.4054054054054055,
+      "grad_norm": 1.4275637865066528,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 3.8521,
+      "step": 182
+    },
+    {
+      "epoch": 1.4131274131274132,
+      "grad_norm": 1.5288902521133423,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 4.0976,
+      "step": 183
+    },
+    {
+      "epoch": 1.420849420849421,
+      "grad_norm": 1.5545921325683594,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 3.9859,
+      "step": 184
+    },
+    {
+      "epoch": 1.4285714285714286,
+      "grad_norm": 1.745778203010559,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 4.0148,
+      "step": 185
+    },
+    {
+      "epoch": 1.4362934362934363,
+      "grad_norm": 1.7549493312835693,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 4.2167,
+      "step": 186
+    },
+    {
+      "epoch": 1.444015444015444,
+      "grad_norm": 1.7042094469070435,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 3.9454,
+      "step": 187
+    },
+    {
+      "epoch": 1.4517374517374517,
+      "grad_norm": 1.8767509460449219,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 3.8077,
+      "step": 188
+    },
+    {
+      "epoch": 1.4594594594594594,
+      "grad_norm": 1.7444730997085571,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 3.3349,
+      "step": 189
+    },
+    {
+      "epoch": 1.4671814671814671,
+      "grad_norm": 1.981868863105774,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 4.0558,
+      "step": 190
+    },
+    {
+      "epoch": 1.4749034749034748,
+      "grad_norm": 2.037660837173462,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 3.9423,
+      "step": 191
+    },
+    {
+      "epoch": 1.4826254826254825,
+      "grad_norm": 2.352879524230957,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 4.1185,
+      "step": 192
+    },
+    {
+      "epoch": 1.4903474903474905,
+      "grad_norm": 2.6159369945526123,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 3.4655,
+      "step": 193
+    },
+    {
+      "epoch": 1.4980694980694982,
+      "grad_norm": 2.1394124031066895,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 2.7503,
+      "step": 194
+    },
+    {
+      "epoch": 1.505791505791506,
+      "grad_norm": 0.937629759311676,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 2.9738,
+      "step": 195
+    },
+    {
+      "epoch": 1.5135135135135136,
+      "grad_norm": 0.8871747255325317,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 3.104,
+      "step": 196
+    },
+    {
+      "epoch": 1.5212355212355213,
+      "grad_norm": 0.9106766581535339,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 3.083,
+      "step": 197
+    },
+    {
+      "epoch": 1.528957528957529,
+      "grad_norm": 0.8707587718963623,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 2.8924,
+      "step": 198
+    },
+    {
+      "epoch": 1.5366795366795367,
+      "grad_norm": 0.8944032788276672,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 2.991,
+      "step": 199
+    },
+    {
+      "epoch": 1.5444015444015444,
+      "grad_norm": 0.9251424670219421,
+      "learning_rate": 0.0,
+      "loss": 2.9376,
+      "step": 200
+    },
+    {
+      "epoch": 1.5444015444015444,
+      "eval_loss": 0.8602708578109741,
+      "eval_runtime": 5.3475,
+      "eval_samples_per_second": 9.35,
+      "eval_steps_per_second": 1.309,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.804996985889751e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null