Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +372 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85b50a43769834d9e2d486bef3c295a9079b5df19e334c2537560550d4ce7c33
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:f07faf6127d39169a72ebc1b375dd95836420665148e5c732581a23669ec2234
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e791379a77b1260754fdef29d089b97673d45cbd95d881cf8436c25721bd3adc
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:d93a52fad5d33875c072a85f36ce088d497e195ffd53e2e3592720d2337dce0b
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4de947bb1161caff73ad966ee0a0ad3d4dba33eaccafcb4fb95194a8571de60f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd8f18ccd06ecf2fa901c5d4b147731be385d926b5a1010e53cdee4a7e6e9e68
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e70710c409284f74d525f8db5cfaccc22a8afd29416f19c595da9242ec92d936
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe157715eb8e05b3bab2a7f2fafac33705dc4a1a9dd7f6d860c3a7f9597d78bb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.21060962975025177,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.3245033112582782,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,372 @@
       "eval_samples_per_second": 5.807,
       "eval_steps_per_second": 0.813,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1502,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.3180618957520896e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.1996617466211319,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 1.7660044150110377,
   "eval_steps": 25,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.807,
       "eval_steps_per_second": 0.813,
       "step": 150
+    },
+    {
+      "epoch": 1.3333333333333333,
+      "grad_norm": 0.22895404696464539,
+      "learning_rate": 4.659698863221513e-05,
+      "loss": 0.1954,
+      "step": 151
+    },
+    {
+      "epoch": 1.3421633554083885,
+      "grad_norm": 0.26738283038139343,
+      "learning_rate": 4.481448235912671e-05,
+      "loss": 0.1996,
+      "step": 152
+    },
+    {
+      "epoch": 1.3509933774834437,
+      "grad_norm": 0.23517753183841705,
+      "learning_rate": 4.306073275629044e-05,
+      "loss": 0.1866,
+      "step": 153
+    },
+    {
+      "epoch": 1.359823399558499,
+      "grad_norm": 0.24005506932735443,
+      "learning_rate": 4.133621928133665e-05,
+      "loss": 0.1938,
+      "step": 154
+    },
+    {
+      "epoch": 1.368653421633554,
+      "grad_norm": 0.2558877468109131,
+      "learning_rate": 3.964141339903026e-05,
+      "loss": 0.1885,
+      "step": 155
+    },
+    {
+      "epoch": 1.3774834437086092,
+      "grad_norm": 0.22536706924438477,
+      "learning_rate": 3.797677845237696e-05,
+      "loss": 0.1748,
+      "step": 156
+    },
+    {
+      "epoch": 1.3863134657836644,
+      "grad_norm": 0.24991802871227264,
+      "learning_rate": 3.634276953594982e-05,
+      "loss": 0.187,
+      "step": 157
+    },
+    {
+      "epoch": 1.3951434878587197,
+      "grad_norm": 0.22741112112998962,
+      "learning_rate": 3.473983337147118e-05,
+      "loss": 0.1728,
+      "step": 158
+    },
+    {
+      "epoch": 1.403973509933775,
+      "grad_norm": 0.247371643781662,
+      "learning_rate": 3.316840818568315e-05,
+      "loss": 0.1947,
+      "step": 159
+    },
+    {
+      "epoch": 1.4128035320088301,
+      "grad_norm": 0.2145814299583435,
+      "learning_rate": 3.162892359054098e-05,
+      "loss": 0.1477,
+      "step": 160
+    },
+    {
+      "epoch": 1.4216335540838851,
+      "grad_norm": 0.20137929916381836,
+      "learning_rate": 3.0121800465761293e-05,
+      "loss": 0.147,
+      "step": 161
+    },
+    {
+      "epoch": 1.4304635761589404,
+      "grad_norm": 0.22576574981212616,
+      "learning_rate": 2.8647450843757897e-05,
+      "loss": 0.1609,
+      "step": 162
+    },
+    {
+      "epoch": 1.4392935982339956,
+      "grad_norm": 0.20561394095420837,
+      "learning_rate": 2.7206277796996144e-05,
+      "loss": 0.1319,
+      "step": 163
+    },
+    {
+      "epoch": 1.4481236203090508,
+      "grad_norm": 0.22655843198299408,
+      "learning_rate": 2.5798675327796993e-05,
+      "loss": 0.1745,
+      "step": 164
+    },
+    {
+      "epoch": 1.4569536423841059,
+      "grad_norm": 0.24106654524803162,
+      "learning_rate": 2.4425028260620715e-05,
+      "loss": 0.1623,
+      "step": 165
+    },
+    {
+      "epoch": 1.465783664459161,
+      "grad_norm": 0.25789502263069153,
+      "learning_rate": 2.3085712136859668e-05,
+      "loss": 0.1694,
+      "step": 166
+    },
+    {
+      "epoch": 1.4746136865342163,
+      "grad_norm": 0.26895081996917725,
+      "learning_rate": 2.178109311216913e-05,
+      "loss": 0.1587,
+      "step": 167
+    },
+    {
+      "epoch": 1.4834437086092715,
+      "grad_norm": 0.30352866649627686,
+      "learning_rate": 2.0511527856363912e-05,
+      "loss": 0.1995,
+      "step": 168
+    },
+    {
+      "epoch": 1.4922737306843268,
+      "grad_norm": 0.5954450964927673,
+      "learning_rate": 1.927736345590839e-05,
+      "loss": 0.3586,
+      "step": 169
+    },
+    {
+      "epoch": 1.501103752759382,
+      "grad_norm": 0.21726396679878235,
+      "learning_rate": 1.8078937319026654e-05,
+      "loss": 0.2243,
+      "step": 170
+    },
+    {
+      "epoch": 1.5099337748344372,
+      "grad_norm": 0.22568368911743164,
+      "learning_rate": 1.6916577083458228e-05,
+      "loss": 0.2319,
+      "step": 171
+    },
+    {
+      "epoch": 1.5187637969094923,
+      "grad_norm": 0.23746433854103088,
+      "learning_rate": 1.579060052688548e-05,
+      "loss": 0.2414,
+      "step": 172
+    },
+    {
+      "epoch": 1.5275938189845475,
+      "grad_norm": 0.21537818014621735,
+      "learning_rate": 1.4701315480056164e-05,
+      "loss": 0.2029,
+      "step": 173
+    },
+    {
+      "epoch": 1.5364238410596025,
+      "grad_norm": 0.22447596490383148,
+      "learning_rate": 1.3649019742625623e-05,
+      "loss": 0.1825,
+      "step": 174
+    },
+    {
+      "epoch": 1.5452538631346577,
+      "grad_norm": 0.2091474086046219,
+      "learning_rate": 1.2634001001741373e-05,
+      "loss": 0.1776,
+      "step": 175
+    },
+    {
+      "epoch": 1.5452538631346577,
+      "eval_loss": 0.19788633286952972,
+      "eval_runtime": 7.9222,
+      "eval_samples_per_second": 6.311,
+      "eval_steps_per_second": 0.884,
+      "step": 175
+    },
+    {
+      "epoch": 1.554083885209713,
+      "grad_norm": 0.22721268236637115,
+      "learning_rate": 1.1656536753392287e-05,
+      "loss": 0.2137,
+      "step": 176
+    },
+    {
+      "epoch": 1.5629139072847682,
+      "grad_norm": 0.20839567482471466,
+      "learning_rate": 1.0716894226543953e-05,
+      "loss": 0.1749,
+      "step": 177
+    },
+    {
+      "epoch": 1.5717439293598234,
+      "grad_norm": 0.21761561930179596,
+      "learning_rate": 9.815330310080887e-06,
+      "loss": 0.1772,
+      "step": 178
+    },
+    {
+      "epoch": 1.5805739514348787,
+      "grad_norm": 0.22803503274917603,
+      "learning_rate": 8.952091482575824e-06,
+      "loss": 0.1855,
+      "step": 179
+    },
+    {
+      "epoch": 1.589403973509934,
+      "grad_norm": 0.21269898116588593,
+      "learning_rate": 8.127413744904804e-06,
+      "loss": 0.1775,
+      "step": 180
+    },
+    {
+      "epoch": 1.598233995584989,
+      "grad_norm": 0.22217035293579102,
+      "learning_rate": 7.34152255572697e-06,
+      "loss": 0.2023,
+      "step": 181
+    },
+    {
+      "epoch": 1.6070640176600441,
+      "grad_norm": 0.21203316748142242,
+      "learning_rate": 6.594632769846353e-06,
+      "loss": 0.1833,
+      "step": 182
+    },
+    {
+      "epoch": 1.6158940397350994,
+      "grad_norm": 0.20570416748523712,
+      "learning_rate": 5.886948579472778e-06,
+      "loss": 0.1535,
+      "step": 183
+    },
+    {
+      "epoch": 1.6247240618101544,
+      "grad_norm": 0.22212745249271393,
+      "learning_rate": 5.218663458397715e-06,
+      "loss": 0.1816,
+      "step": 184
+    },
+    {
+      "epoch": 1.6335540838852096,
+      "grad_norm": 0.22477659583091736,
+      "learning_rate": 4.589960109100444e-06,
+      "loss": 0.1934,
+      "step": 185
+    },
+    {
+      "epoch": 1.6423841059602649,
+      "grad_norm": 0.21524512767791748,
+      "learning_rate": 4.001010412799138e-06,
+      "loss": 0.1755,
+      "step": 186
+    },
+    {
+      "epoch": 1.65121412803532,
+      "grad_norm": 0.2027374505996704,
+      "learning_rate": 3.451975382460109e-06,
+      "loss": 0.1684,
+      "step": 187
+    },
+    {
+      "epoch": 1.6600441501103753,
+      "grad_norm": 0.1885063648223877,
+      "learning_rate": 2.9430051187785962e-06,
+      "loss": 0.1393,
+      "step": 188
+    },
+    {
+      "epoch": 1.6688741721854305,
+      "grad_norm": 0.20242398977279663,
+      "learning_rate": 2.4742387691426445e-06,
+      "loss": 0.1542,
+      "step": 189
+    },
+    {
+      "epoch": 1.6777041942604858,
+      "grad_norm": 0.20245841145515442,
+      "learning_rate": 2.0458044895916513e-06,
+      "loss": 0.1246,
+      "step": 190
+    },
+    {
+      "epoch": 1.6865342163355408,
+      "grad_norm": 0.19838440418243408,
+      "learning_rate": 1.6578194097797258e-06,
+      "loss": 0.15,
+      "step": 191
+    },
+    {
+      "epoch": 1.695364238410596,
+      "grad_norm": 0.19168749451637268,
+      "learning_rate": 1.3103896009537207e-06,
+      "loss": 0.1339,
+      "step": 192
+    },
+    {
+      "epoch": 1.7041942604856513,
+      "grad_norm": 0.21052022278308868,
+      "learning_rate": 1.0036100469542786e-06,
+      "loss": 0.1397,
+      "step": 193
+    },
+    {
+      "epoch": 1.7130242825607063,
+      "grad_norm": 0.24240228533744812,
+      "learning_rate": 7.375646182482875e-07,
+      "loss": 0.1545,
+      "step": 194
+    },
+    {
+      "epoch": 1.7218543046357615,
+      "grad_norm": 0.2707175314426422,
+      "learning_rate": 5.123260489995229e-07,
+      "loss": 0.1654,
+      "step": 195
+    },
+    {
+      "epoch": 1.7306843267108167,
+      "grad_norm": 0.3442803621292114,
+      "learning_rate": 3.2795591718381975e-07,
+      "loss": 0.2205,
+      "step": 196
+    },
+    {
+      "epoch": 1.739514348785872,
+      "grad_norm": 0.7022460103034973,
+      "learning_rate": 1.8450462775428942e-07,
+      "loss": 0.3136,
+      "step": 197
+    },
+    {
+      "epoch": 1.7483443708609272,
+      "grad_norm": 0.190536230802536,
+      "learning_rate": 8.201139886109264e-08,
+      "loss": 0.2159,
+      "step": 198
+    },
+    {
+      "epoch": 1.7571743929359824,
+      "grad_norm": 0.21792356669902802,
+      "learning_rate": 2.0504251129649374e-08,
+      "loss": 0.2479,
+      "step": 199
+    },
+    {
+      "epoch": 1.7660044150110377,
+      "grad_norm": 0.22573994100093842,
+      "learning_rate": 0.0,
+      "loss": 0.2372,
+      "step": 200
+    },
+    {
+      "epoch": 1.7660044150110377,
+      "eval_loss": 0.1996617466211319,
+      "eval_runtime": 8.6103,
+      "eval_samples_per_second": 5.807,
+      "eval_steps_per_second": 0.813,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.3349111537821286e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null