Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8939a97c8cf69dc2293d9a3d98e2bc84d4314da044570fc97fe63f160614912f
 size 50624

 version https://git-lfs.github.com/spec/v1
+oid sha256:aac0b5f64801ac1fb37f005443129294ef4f50bb96eed9d73f5e838c647f2c67
 size 50624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da651a30416b2e6cc03c5df9bd45af5ec982e95bc9ea699196a8a0e19925245f
 size 118090

 version https://git-lfs.github.com/spec/v1
+oid sha256:9578127d0c83a7681d403efee980859388cda9446bd5b9de0721e08bd47a6ea5
 size 118090

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b8d114b7aeb318d673e32c7968e1dc83c5095db1b19036ccb0e75a201d604f1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c65b889e0f8b6457fab65599625eb2180c2a11ed70e5358d050a058c98ebea40
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1900bd8021f13c38b942ed30aea6e2cea1b47664e4ce28d0276b142334732307
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2c4a11c3ec7ace2e963dc6e2b0b5b6372cc0250cefb36d5f7289475908638cb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.366990089416504,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.8791208791208791,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 492.002,
       "eval_steps_per_second": 123.0,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5230244659200.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.366907119750977,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.7648351648351648,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 492.002,
       "eval_steps_per_second": 123.0,
       "step": 50
+    },
+    {
+      "epoch": 0.8967032967032967,
+      "grad_norm": 0.040907103568315506,
+      "learning_rate": 2.847932752400164e-06,
+      "loss": 10.3663,
+      "step": 51
+    },
+    {
+      "epoch": 0.9142857142857143,
+      "grad_norm": 0.039741672575473785,
+      "learning_rate": 2.761321158169134e-06,
+      "loss": 10.3672,
+      "step": 52
+    },
+    {
+      "epoch": 0.9318681318681319,
+      "grad_norm": 0.04772138595581055,
+      "learning_rate": 2.6743911843603134e-06,
+      "loss": 10.372,
+      "step": 53
+    },
+    {
+      "epoch": 0.9494505494505494,
+      "grad_norm": 0.05010518804192543,
+      "learning_rate": 2.587248741756253e-06,
+      "loss": 10.3745,
+      "step": 54
+    },
+    {
+      "epoch": 0.967032967032967,
+      "grad_norm": 0.05096741393208504,
+      "learning_rate": 2.5e-06,
+      "loss": 10.3641,
+      "step": 55
+    },
+    {
+      "epoch": 0.9846153846153847,
+      "grad_norm": 0.057732198387384415,
+      "learning_rate": 2.4127512582437486e-06,
+      "loss": 10.3696,
+      "step": 56
+    },
+    {
+      "epoch": 1.0087912087912088,
+      "grad_norm": 0.04998980835080147,
+      "learning_rate": 2.325608815639687e-06,
+      "loss": 16.071,
+      "step": 57
+    },
+    {
+      "epoch": 1.0263736263736263,
+      "grad_norm": 0.036868538707494736,
+      "learning_rate": 2.238678841830867e-06,
+      "loss": 10.0527,
+      "step": 58
+    },
+    {
+      "epoch": 1.043956043956044,
+      "grad_norm": 0.04606516286730766,
+      "learning_rate": 2.1520672475998374e-06,
+      "loss": 10.5546,
+      "step": 59
+    },
+    {
+      "epoch": 1.0615384615384615,
+      "grad_norm": 0.038774143904447556,
+      "learning_rate": 2.0658795558326745e-06,
+      "loss": 9.9698,
+      "step": 60
+    },
+    {
+      "epoch": 1.079120879120879,
+      "grad_norm": 0.03512636572122574,
+      "learning_rate": 1.9802207729556023e-06,
+      "loss": 10.5632,
+      "step": 61
+    },
+    {
+      "epoch": 1.0967032967032968,
+      "grad_norm": 0.04192524775862694,
+      "learning_rate": 1.895195261000831e-06,
+      "loss": 10.3435,
+      "step": 62
+    },
+    {
+      "epoch": 1.1142857142857143,
+      "grad_norm": 0.04670780524611473,
+      "learning_rate": 1.8109066104575023e-06,
+      "loss": 10.3459,
+      "step": 63
+    },
+    {
+      "epoch": 1.1318681318681318,
+      "grad_norm": 0.04146342724561691,
+      "learning_rate": 1.7274575140626318e-06,
+      "loss": 10.5141,
+      "step": 64
+    },
+    {
+      "epoch": 1.1494505494505494,
+      "grad_norm": 0.04124102741479874,
+      "learning_rate": 1.6449496416858285e-06,
+      "loss": 10.2299,
+      "step": 65
+    },
+    {
+      "epoch": 1.167032967032967,
+      "grad_norm": 0.04173829033970833,
+      "learning_rate": 1.56348351646022e-06,
+      "loss": 10.39,
+      "step": 66
+    },
+    {
+      "epoch": 1.1846153846153846,
+      "grad_norm": 0.04246349632740021,
+      "learning_rate": 1.4831583923105e-06,
+      "loss": 10.2809,
+      "step": 67
+    },
+    {
+      "epoch": 1.2021978021978021,
+      "grad_norm": 0.040836114436388016,
+      "learning_rate": 1.4040721330273063e-06,
+      "loss": 10.43,
+      "step": 68
+    },
+    {
+      "epoch": 1.2197802197802199,
+      "grad_norm": 0.05367843434214592,
+      "learning_rate": 1.3263210930352737e-06,
+      "loss": 10.3855,
+      "step": 69
+    },
+    {
+      "epoch": 1.2373626373626374,
+      "grad_norm": 0.05292685702443123,
+      "learning_rate": 1.2500000000000007e-06,
+      "loss": 10.9177,
+      "step": 70
+    },
+    {
+      "epoch": 1.254945054945055,
+      "grad_norm": 0.033866383135318756,
+      "learning_rate": 1.1752018394169882e-06,
+      "loss": 10.035,
+      "step": 71
+    },
+    {
+      "epoch": 1.2725274725274724,
+      "grad_norm": 0.03704410046339035,
+      "learning_rate": 1.1020177413231334e-06,
+      "loss": 9.9829,
+      "step": 72
+    },
+    {
+      "epoch": 1.2901098901098902,
+      "grad_norm": 0.033814772963523865,
+      "learning_rate": 1.0305368692688175e-06,
+      "loss": 10.4889,
+      "step": 73
+    },
+    {
+      "epoch": 1.3076923076923077,
+      "grad_norm": 0.03551120683550835,
+      "learning_rate": 9.608463116858544e-07,
+      "loss": 10.3945,
+      "step": 74
+    },
+    {
+      "epoch": 1.3252747252747252,
+      "grad_norm": 0.03838565945625305,
+      "learning_rate": 8.930309757836517e-07,
+      "loss": 10.5222,
+      "step": 75
+    },
+    {
+      "epoch": 1.342857142857143,
+      "grad_norm": 0.03614303097128868,
+      "learning_rate": 8.271734841028553e-07,
+      "loss": 10.1321,
+      "step": 76
+    },
+    {
+      "epoch": 1.3604395604395605,
+      "grad_norm": 0.0375247560441494,
+      "learning_rate": 7.633540738525066e-07,
+      "loss": 10.3162,
+      "step": 77
+    },
+    {
+      "epoch": 1.378021978021978,
+      "grad_norm": 0.04089844599366188,
+      "learning_rate": 7.016504991533727e-07,
+      "loss": 10.7107,
+      "step": 78
+    },
+    {
+      "epoch": 1.3956043956043955,
+      "grad_norm": 0.040124256163835526,
+      "learning_rate": 6.421379363065142e-07,
+      "loss": 10.0535,
+      "step": 79
+    },
+    {
+      "epoch": 1.413186813186813,
+      "grad_norm": 0.03981036692857742,
+      "learning_rate": 5.848888922025553e-07,
+      "loss": 10.6221,
+      "step": 80
+    },
+    {
+      "epoch": 1.4307692307692308,
+      "grad_norm": 0.04783904552459717,
+      "learning_rate": 5.299731159831953e-07,
+      "loss": 10.1964,
+      "step": 81
+    },
+    {
+      "epoch": 1.4483516483516483,
+      "grad_norm": 0.044217731803655624,
+      "learning_rate": 4.774575140626317e-07,
+      "loss": 10.1716,
+      "step": 82
+    },
+    {
+      "epoch": 1.465934065934066,
+      "grad_norm": 0.05215898156166077,
+      "learning_rate": 4.27406068612396e-07,
+      "loss": 10.7019,
+      "step": 83
+    },
+    {
+      "epoch": 1.4835164835164836,
+      "grad_norm": 0.055684931576251984,
+      "learning_rate": 3.798797596089351e-07,
+      "loss": 10.6131,
+      "step": 84
+    },
+    {
+      "epoch": 1.501098901098901,
+      "grad_norm": 0.041437309235334396,
+      "learning_rate": 3.3493649053890325e-07,
+      "loss": 10.5622,
+      "step": 85
+    },
+    {
+      "epoch": 1.5186813186813186,
+      "grad_norm": 0.0326310358941555,
+      "learning_rate": 2.9263101785268253e-07,
+      "loss": 9.7426,
+      "step": 86
+    },
+    {
+      "epoch": 1.5362637362637361,
+      "grad_norm": 0.041171569377183914,
+      "learning_rate": 2.53014884252083e-07,
+      "loss": 10.4579,
+      "step": 87
+    },
+    {
+      "epoch": 1.5538461538461539,
+      "grad_norm": 0.03874897211790085,
+      "learning_rate": 2.1613635589349756e-07,
+      "loss": 10.3767,
+      "step": 88
+    },
+    {
+      "epoch": 1.5714285714285714,
+      "grad_norm": 0.03308764472603798,
+      "learning_rate": 1.8204036358303173e-07,
+      "loss": 10.1858,
+      "step": 89
+    },
+    {
+      "epoch": 1.5890109890109891,
+      "grad_norm": 0.0384545736014843,
+      "learning_rate": 1.507684480352292e-07,
+      "loss": 10.4582,
+      "step": 90
+    },
+    {
+      "epoch": 1.6065934065934067,
+      "grad_norm": 0.043392788618803024,
+      "learning_rate": 1.223587092621162e-07,
+      "loss": 10.7153,
+      "step": 91
+    },
+    {
+      "epoch": 1.6241758241758242,
+      "grad_norm": 0.03623654693365097,
+      "learning_rate": 9.684576015420277e-08,
+      "loss": 10.0836,
+      "step": 92
+    },
+    {
+      "epoch": 1.6417582417582417,
+      "grad_norm": 0.0385335274040699,
+      "learning_rate": 7.426068431000883e-08,
+      "loss": 10.1258,
+      "step": 93
+    },
+    {
+      "epoch": 1.6593406593406592,
+      "grad_norm": 0.04223882779479027,
+      "learning_rate": 5.463099816548578e-08,
+      "loss": 10.7705,
+      "step": 94
+    },
+    {
+      "epoch": 1.676923076923077,
+      "grad_norm": 0.03968479484319687,
+      "learning_rate": 3.798061746947995e-08,
+      "loss": 10.4866,
+      "step": 95
+    },
+    {
+      "epoch": 1.6945054945054945,
+      "grad_norm": 0.05118681862950325,
+      "learning_rate": 2.4329828146074096e-08,
+      "loss": 10.0814,
+      "step": 96
+    },
+    {
+      "epoch": 1.7120879120879122,
+      "grad_norm": 0.055021852254867554,
+      "learning_rate": 1.3695261579316776e-08,
+      "loss": 10.5808,
+      "step": 97
+    },
+    {
+      "epoch": 1.7296703296703297,
+      "grad_norm": 0.04585081338882446,
+      "learning_rate": 6.089874350439507e-09,
+      "loss": 10.5095,
+      "step": 98
+    },
+    {
+      "epoch": 1.7472527472527473,
+      "grad_norm": 0.03800305351614952,
+      "learning_rate": 1.5229324522605949e-09,
+      "loss": 10.2719,
+      "step": 99
+    },
+    {
+      "epoch": 1.7648351648351648,
+      "grad_norm": 0.040729913860559464,
+      "learning_rate": 0.0,
+      "loss": 10.2925,
+      "step": 100
+    },
+    {
+      "epoch": 1.7648351648351648,
+      "eval_loss": 10.366907119750977,
+      "eval_runtime": 0.1942,
+      "eval_samples_per_second": 494.268,
+      "eval_steps_per_second": 123.567,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 10460489318400.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null