Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe549ee9dada1f95ab93b5af457fac4eddf0fcd0ab26cd08cf33970906e4d343
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:c81ae76f139a6fbd49837ffb7db68ea4322593aca36914bf8d1400f3c251a992
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6b69102c0c036034527ee1dc14ae9b43bec35a05417b76a628d553987bfc18c
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:1baab1253bf30c89524e7753a1ecfb66892f5edf4c8cfcc05827ce5b9cc28184
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86550c2e58d601238381d1429c8db3d497ef4a0a8021121fc8233e9208262dc5
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:75ef0eea3c0bc854ba34b865b9558fc4ab4914a7ec715fbb2876475c10df66a0
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1b4de024c4d2e8af2ea52dd657832dfd45abf183bb8c54dc73f2991fd01f0db
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:29f781b7b0d8091e80c7d35044a0c0ef12279cbf5144a85f84372a55deac8620
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3a3de787a3446796079b250eab43da785b80632640168d2ea13d041b696665c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cfe3f4ea1ace9e924b6ebf174c05335ece2b936b47425b4617208c6fbd02ee0
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:068bf3317cb90ffdd048ebc7c5b93cd6ebc300a3756d03eda522baded98870eb
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:217ea22f993c4c1725f45993693d32379413c9e141202ed40e7e298f40e289a5
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 3.6008565425872803,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 1.1363636363636362,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 10.256,
       "eval_steps_per_second": 2.667,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.472678670261289e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 3.5564169883728027,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 2.2727272727272725,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.256,
       "eval_steps_per_second": 2.667,
       "step": 25
+    },
+    {
+      "epoch": 1.1818181818181819,
+      "grad_norm": 0.4389213025569916,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 3.4342,
+      "step": 26
+    },
+    {
+      "epoch": 1.2272727272727273,
+      "grad_norm": 0.506675124168396,
+      "learning_rate": 5.205685918464356e-05,
+      "loss": 3.5071,
+      "step": 27
+    },
+    {
+      "epoch": 1.2727272727272727,
+      "grad_norm": 0.33202725648880005,
+      "learning_rate": 4.912632135009769e-05,
+      "loss": 3.2895,
+      "step": 28
+    },
+    {
+      "epoch": 1.3181818181818181,
+      "grad_norm": 0.41267460584640503,
+      "learning_rate": 4.6220935509274235e-05,
+      "loss": 3.2902,
+      "step": 29
+    },
+    {
+      "epoch": 1.3636363636363638,
+      "grad_norm": 0.3891868591308594,
+      "learning_rate": 4.3353142970386564e-05,
+      "loss": 3.2911,
+      "step": 30
+    },
+    {
+      "epoch": 1.4090909090909092,
+      "grad_norm": 0.40933117270469666,
+      "learning_rate": 4.053522406135775e-05,
+      "loss": 3.4391,
+      "step": 31
+    },
+    {
+      "epoch": 1.4545454545454546,
+      "grad_norm": 0.4993307888507843,
+      "learning_rate": 3.777924554357096e-05,
+      "loss": 3.4417,
+      "step": 32
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 0.355300635099411,
+      "learning_rate": 3.509700894014496e-05,
+      "loss": 3.3338,
+      "step": 33
+    },
+    {
+      "epoch": 1.5454545454545454,
+      "grad_norm": 0.3318493366241455,
+      "learning_rate": 3.250000000000001e-05,
+      "loss": 3.2812,
+      "step": 34
+    },
+    {
+      "epoch": 1.5909090909090908,
+      "grad_norm": 0.3499109148979187,
+      "learning_rate": 2.9999339514117912e-05,
+      "loss": 3.2823,
+      "step": 35
+    },
+    {
+      "epoch": 1.6363636363636362,
+      "grad_norm": 0.3723719120025635,
+      "learning_rate": 2.760573569460757e-05,
+      "loss": 3.3055,
+      "step": 36
+    },
+    {
+      "epoch": 1.6818181818181817,
+      "grad_norm": 0.46838635206222534,
+      "learning_rate": 2.53294383204969e-05,
+      "loss": 3.4007,
+      "step": 37
+    },
+    {
+      "epoch": 1.7272727272727273,
+      "grad_norm": 0.3957988917827606,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 3.3339,
+      "step": 38
+    },
+    {
+      "epoch": 1.7727272727272727,
+      "grad_norm": 0.2677713632583618,
+      "learning_rate": 2.1167208663446025e-05,
+      "loss": 3.2229,
+      "step": 39
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 0.30480334162712097,
+      "learning_rate": 1.9299099686894423e-05,
+      "loss": 3.2728,
+      "step": 40
+    },
+    {
+      "epoch": 1.8636363636363638,
+      "grad_norm": 0.3701198995113373,
+      "learning_rate": 1.758386744638546e-05,
+      "loss": 3.3106,
+      "step": 41
+    },
+    {
+      "epoch": 1.9090909090909092,
+      "grad_norm": 0.4461487829685211,
+      "learning_rate": 1.602885682970026e-05,
+      "loss": 3.4646,
+      "step": 42
+    },
+    {
+      "epoch": 1.9545454545454546,
+      "grad_norm": 0.61518394947052,
+      "learning_rate": 1.464072663102903e-05,
+      "loss": 3.5355,
+      "step": 43
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.3342447578907013,
+      "learning_rate": 1.3425421036992098e-05,
+      "loss": 3.1614,
+      "step": 44
+    },
+    {
+      "epoch": 2.0454545454545454,
+      "grad_norm": 0.25999006628990173,
+      "learning_rate": 1.2388144172720251e-05,
+      "loss": 3.229,
+      "step": 45
+    },
+    {
+      "epoch": 2.090909090909091,
+      "grad_norm": 0.3094271719455719,
+      "learning_rate": 1.1533337816991932e-05,
+      "loss": 3.2783,
+      "step": 46
+    },
+    {
+      "epoch": 2.1363636363636362,
+      "grad_norm": 0.3765052855014801,
+      "learning_rate": 1.0864662381854632e-05,
+      "loss": 3.1472,
+      "step": 47
+    },
+    {
+      "epoch": 2.1818181818181817,
+      "grad_norm": 0.4353832006454468,
+      "learning_rate": 1.0384981238178534e-05,
+      "loss": 3.3006,
+      "step": 48
+    },
+    {
+      "epoch": 2.227272727272727,
+      "grad_norm": 0.509846031665802,
+      "learning_rate": 1.0096348454262845e-05,
+      "loss": 3.3415,
+      "step": 49
+    },
+    {
+      "epoch": 2.2727272727272725,
+      "grad_norm": 0.2852264940738678,
+      "learning_rate": 1e-05,
+      "loss": 3.2566,
+      "step": 50
+    },
+    {
+      "epoch": 2.2727272727272725,
+      "eval_loss": 3.5564169883728027,
+      "eval_runtime": 4.8889,
+      "eval_samples_per_second": 10.227,
+      "eval_steps_per_second": 2.659,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.94535732978516e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null