Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de16c443b4647ceb5e30d1ddf8c9deb53202ccce4b7aa64b8a426c6af4ddd48c
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:461b8d10c9a3316e1759c0939d237ea718017295cfb9ffb180b28bff171ae3e8
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd3484d7b47dbafd231bb94fdc316caad0464e576e21797b4ecb2e541b1c5c79
 size 180543866

 version https://git-lfs.github.com/spec/v1
+oid sha256:29ccf389cd7a4bcc74f7581b3c63b9628f198607c6cc019e45c7e7b7e7671095
 size 180543866

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e3abfcd2ec48e75e553b203bd47ebd5e3dabdfac6664e075a305886c8b7ed85
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d66021f5caba8057c6ecf5d4d5d35171bc5e87686a19c38c2b8f606b7b1c24a4
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a70cf6ecc81ec51bc40b7c93886c98a2ecb312c087a4c9561f0cf74a4b522232
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0eaa4155fd776d4c4a4eaa1a0df919a1718e600694a5558a1d235511dd77ad96
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d8bdbd7e85cde52b7547db495c90d9a186118046ff6e363ee566dfc4e3c6c8d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8055ec87d10583b19c190a67930301fb3523c677fa6ec3bea83cf2ccb48e16a6
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f93479ecb6f4e69cbb238b7a546a3f70bc5b3f1a8421831c75cb7ec9450f372
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4f3e4475a015fef5bbedf596c2abcc404b1de33fae1326853d93e886aaa5a0c
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.3310394585132599,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.23923444976076555,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 127.632,
       "eval_steps_per_second": 15.954,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.917661344681165e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.2678118944168091,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.4784688995215311,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 127.632,
       "eval_steps_per_second": 15.954,
       "step": 25
+    },
+    {
+      "epoch": 0.24880382775119617,
+      "grad_norm": 0.550075113773346,
+      "learning_rate": 5e-05,
+      "loss": 0.2946,
+      "step": 26
+    },
+    {
+      "epoch": 0.2583732057416268,
+      "grad_norm": 0.6220561265945435,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.3003,
+      "step": 27
+    },
+    {
+      "epoch": 0.2679425837320574,
+      "grad_norm": 0.550743579864502,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.2996,
+      "step": 28
+    },
+    {
+      "epoch": 0.27751196172248804,
+      "grad_norm": 0.5144545435905457,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.3006,
+      "step": 29
+    },
+    {
+      "epoch": 0.28708133971291866,
+      "grad_norm": 0.5131386518478394,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.2989,
+      "step": 30
+    },
+    {
+      "epoch": 0.2966507177033493,
+      "grad_norm": 0.5182365775108337,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.2806,
+      "step": 31
+    },
+    {
+      "epoch": 0.3062200956937799,
+      "grad_norm": 0.5407494306564331,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.2921,
+      "step": 32
+    },
+    {
+      "epoch": 0.3157894736842105,
+      "grad_norm": 0.5603722929954529,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.281,
+      "step": 33
+    },
+    {
+      "epoch": 0.3253588516746411,
+      "grad_norm": 0.532714307308197,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.26,
+      "step": 34
+    },
+    {
+      "epoch": 0.3349282296650718,
+      "grad_norm": 0.5404855012893677,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.2831,
+      "step": 35
+    },
+    {
+      "epoch": 0.3444976076555024,
+      "grad_norm": 0.7140843272209167,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.2761,
+      "step": 36
+    },
+    {
+      "epoch": 0.35406698564593303,
+      "grad_norm": 1.1513055562973022,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.272,
+      "step": 37
+    },
+    {
+      "epoch": 0.36363636363636365,
+      "grad_norm": 0.7768696546554565,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.2692,
+      "step": 38
+    },
+    {
+      "epoch": 0.37320574162679426,
+      "grad_norm": 0.4334043860435486,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.263,
+      "step": 39
+    },
+    {
+      "epoch": 0.3827751196172249,
+      "grad_norm": 0.4151334762573242,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.265,
+      "step": 40
+    },
+    {
+      "epoch": 0.3923444976076555,
+      "grad_norm": 0.46699267625808716,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.2688,
+      "step": 41
+    },
+    {
+      "epoch": 0.4019138755980861,
+      "grad_norm": 0.4230937063694,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.2582,
+      "step": 42
+    },
+    {
+      "epoch": 0.41148325358851673,
+      "grad_norm": 0.4443204402923584,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.2508,
+      "step": 43
+    },
+    {
+      "epoch": 0.42105263157894735,
+      "grad_norm": 0.501763105392456,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.2547,
+      "step": 44
+    },
+    {
+      "epoch": 0.430622009569378,
+      "grad_norm": 0.6074100732803345,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.2675,
+      "step": 45
+    },
+    {
+      "epoch": 0.44019138755980863,
+      "grad_norm": 0.5926224589347839,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.2775,
+      "step": 46
+    },
+    {
+      "epoch": 0.44976076555023925,
+      "grad_norm": 0.6422264575958252,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.2847,
+      "step": 47
+    },
+    {
+      "epoch": 0.45933014354066987,
+      "grad_norm": 0.6994796395301819,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.2657,
+      "step": 48
+    },
+    {
+      "epoch": 0.4688995215311005,
+      "grad_norm": 0.72403484582901,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.2674,
+      "step": 49
+    },
+    {
+      "epoch": 0.4784688995215311,
+      "grad_norm": 0.8881353139877319,
+      "learning_rate": 0.0,
+      "loss": 0.2547,
+      "step": 50
+    },
+    {
+      "epoch": 0.4784688995215311,
+      "eval_loss": 0.2678118944168091,
+      "eval_runtime": 5.5157,
+      "eval_samples_per_second": 127.637,
+      "eval_steps_per_second": 15.955,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.832875685432525e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null