Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:909eaa4f3491722adaa74b16cb1a21a6b43de61232737c8d2d359f7c5b8a3736
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:20de42903767c3c9fdfe086367090dfe983c906fd37b3a26f120395a7b08a7f5
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6aa979e3f4cf0aef003f335227f57ca0fdc014dc477536b8ded0e89f916b68b3
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:af6e5e78e02dadd20b7558e033a58155c844c4917377e7ce98697a3a03ea1288
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4918b23f35d0d3a6980749823af3d79c679b78eebde21d17a638cd051ac8536d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b36c68576797e56da06c6ecfb4fbbb58c997f36d8929545ffb55aec00b2b7cf
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c55ccf9d954e6aca90d63eafe32fa046ba8821ba4b6cad7da2e1427b60bfde47
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:367d583d2484ab2199903ca9de9c21eda09eaf333a27876f8aefb621b0ada8e5
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b39539405562e087712e4549e8ea336df483d73af448c18ba9fbc4609a742a0
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:f29c9762a62d7fe4ac47aafaf7b9e2591b5fc593e796eaa6512450c88acbf6e1
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4851f857e960c583d1e143047b0cf2b82b834d1e5d909d29ef61d632ed74c16c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c2a2b294fc63a4154ca178630c641eb02c4ec1a28e8986866658181a51fa93f
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.3213779330253601,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 1.4483985765124556,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 38.472,
       "eval_steps_per_second": 4.891,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.829010669142016e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.097114197909832,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 2.8967971530249113,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 38.472,
       "eval_steps_per_second": 4.891,
       "step": 25
+    },
+    {
+      "epoch": 1.5053380782918149,
+      "grad_norm": 143.5742645263672,
+      "learning_rate": 5e-05,
+      "loss": 4.1581,
+      "step": 26
+    },
+    {
+      "epoch": 1.5622775800711743,
+      "grad_norm": 76.80440521240234,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 1.616,
+      "step": 27
+    },
+    {
+      "epoch": 1.6192170818505338,
+      "grad_norm": 118.89835357666016,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 3.2303,
+      "step": 28
+    },
+    {
+      "epoch": 1.6761565836298933,
+      "grad_norm": 94.23796081542969,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 1.5968,
+      "step": 29
+    },
+    {
+      "epoch": 1.7330960854092528,
+      "grad_norm": 114.2875747680664,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 3.7156,
+      "step": 30
+    },
+    {
+      "epoch": 1.790035587188612,
+      "grad_norm": 67.07085418701172,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 1.808,
+      "step": 31
+    },
+    {
+      "epoch": 1.8469750889679717,
+      "grad_norm": 83.40116882324219,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 2.7499,
+      "step": 32
+    },
+    {
+      "epoch": 1.903914590747331,
+      "grad_norm": 71.34392547607422,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 1.3756,
+      "step": 33
+    },
+    {
+      "epoch": 1.9608540925266904,
+      "grad_norm": 50.69999313354492,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 1.4092,
+      "step": 34
+    },
+    {
+      "epoch": 2.0427046263345194,
+      "grad_norm": 36.84391784667969,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 1.0828,
+      "step": 35
+    },
+    {
+      "epoch": 2.099644128113879,
+      "grad_norm": 33.99079513549805,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.8324,
+      "step": 36
+    },
+    {
+      "epoch": 2.1565836298932384,
+      "grad_norm": 29.23866844177246,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.7622,
+      "step": 37
+    },
+    {
+      "epoch": 2.213523131672598,
+      "grad_norm": 48.40800476074219,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.9082,
+      "step": 38
+    },
+    {
+      "epoch": 2.2704626334519573,
+      "grad_norm": 32.54946517944336,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.5952,
+      "step": 39
+    },
+    {
+      "epoch": 2.3274021352313166,
+      "grad_norm": 26.96776580810547,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.3644,
+      "step": 40
+    },
+    {
+      "epoch": 2.3843416370106763,
+      "grad_norm": 18.696626663208008,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.3476,
+      "step": 41
+    },
+    {
+      "epoch": 2.4412811387900355,
+      "grad_norm": 9.09317398071289,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.2057,
+      "step": 42
+    },
+    {
+      "epoch": 2.498220640569395,
+      "grad_norm": 52.68156433105469,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.684,
+      "step": 43
+    },
+    {
+      "epoch": 2.5551601423487544,
+      "grad_norm": 29.05806541442871,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.4977,
+      "step": 44
+    },
+    {
+      "epoch": 2.612099644128114,
+      "grad_norm": 17.673227310180664,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.2543,
+      "step": 45
+    },
+    {
+      "epoch": 2.6690391459074734,
+      "grad_norm": 13.347757339477539,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.1296,
+      "step": 46
+    },
+    {
+      "epoch": 2.7259786476868326,
+      "grad_norm": 30.75640869140625,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.4859,
+      "step": 47
+    },
+    {
+      "epoch": 2.7829181494661923,
+      "grad_norm": 51.863101959228516,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.6726,
+      "step": 48
+    },
+    {
+      "epoch": 2.8398576512455516,
+      "grad_norm": 27.79411506652832,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.2724,
+      "step": 49
+    },
+    {
+      "epoch": 2.8967971530249113,
+      "grad_norm": 25.98094367980957,
+      "learning_rate": 0.0,
+      "loss": 0.5203,
+      "step": 50
+    },
+    {
+      "epoch": 2.8967971530249113,
+      "eval_loss": 0.097114197909832,
+      "eval_runtime": 3.0654,
+      "eval_samples_per_second": 38.494,
+      "eval_steps_per_second": 4.893,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.658021338284032e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null