Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +189 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:420368af9180ab0f2b15f5db383d1e56829c45001d761731b7d383324dce7d25
 size 335604696

 version https://git-lfs.github.com/spec/v1
+oid sha256:e03b63496db385d0b3805ed508b7882ac461cb3ffdd20be586b64bc8ccc8f6e0
 size 335604696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21595373ac8685232742187189922849a2da52e507765b8c952238654856b1f0
 size 671466706

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdd1214062ebd8a623f17d467bae6d49c07adbd29c222bc84532f936f63195cf
 size 671466706

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e5e49315a443925334d3969d2f4a3d08aed2bdb7c8da03f96f6db8df45fdeb2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:782356d12bcc7d04b4aee86f6a86abac4e08249de31fee6dc6362bae9935ecb7
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:25a43a3a86d652768821ff12957547b4f0a61a9409b9f65c199187423987ad58
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e544353357880f9680217caf553a7ffd8e3cd4ba7b336f88a0b351710ac6db9b
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2cc24efa91f79dc2fc386c73a64cbd104609d77c9faec8740b270d5a9609ad7
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0c3054ce8841059f7044850b722f9e66fe73623ed7a4363aadcda71bc3d039e
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2251383c003419fa8563aeee38d09dd4ef7b9708012a882899b2e0a3ed159f7d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7933cf4c44d97e1cfb44e202e79639240ff05f07a0d35ea078a6d67a8619f48d
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.4649864137172699,
-  "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.030677199171715623,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 32.78,
       "eval_steps_per_second": 4.102,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.9859068337822106e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.4557400643825531,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.061354398343431246,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 32.78,
       "eval_steps_per_second": 4.102,
       "step": 25
+    },
+    {
+      "epoch": 0.031904287138584245,
+      "grad_norm": 0.10585936903953552,
+      "learning_rate": 5e-05,
+      "loss": 0.3251,
+      "step": 26
+    },
+    {
+      "epoch": 0.03313137510545287,
+      "grad_norm": 0.18751554191112518,
+      "learning_rate": 4.6729843538492847e-05,
+      "loss": 0.4752,
+      "step": 27
+    },
+    {
+      "epoch": 0.034358463072321495,
+      "grad_norm": 0.21235325932502747,
+      "learning_rate": 4.347369038899744e-05,
+      "loss": 0.5146,
+      "step": 28
+    },
+    {
+      "epoch": 0.03558555103919012,
+      "grad_norm": 0.20149441063404083,
+      "learning_rate": 4.0245483899193595e-05,
+      "loss": 0.5263,
+      "step": 29
+    },
+    {
+      "epoch": 0.036812639006058745,
+      "grad_norm": 0.17985205352306366,
+      "learning_rate": 3.705904774487396e-05,
+      "loss": 0.5507,
+      "step": 30
+    },
+    {
+      "epoch": 0.038039726972927373,
+      "grad_norm": 0.1699896901845932,
+      "learning_rate": 3.392802673484193e-05,
+      "loss": 0.5189,
+      "step": 31
+    },
+    {
+      "epoch": 0.039266814939795995,
+      "grad_norm": 0.143082857131958,
+      "learning_rate": 3.086582838174551e-05,
+      "loss": 0.5119,
+      "step": 32
+    },
+    {
+      "epoch": 0.040493902906664624,
+      "grad_norm": 0.15867877006530762,
+      "learning_rate": 2.7885565489049946e-05,
+      "loss": 0.4714,
+      "step": 33
+    },
+    {
+      "epoch": 0.041720990873533245,
+      "grad_norm": 0.18457220494747162,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.4246,
+      "step": 34
+    },
+    {
+      "epoch": 0.042948078840401874,
+      "grad_norm": 0.18785923719406128,
+      "learning_rate": 2.2221488349019903e-05,
+      "loss": 0.4165,
+      "step": 35
+    },
+    {
+      "epoch": 0.044175166807270495,
+      "grad_norm": 0.20624688267707825,
+      "learning_rate": 1.9561928549563968e-05,
+      "loss": 0.3954,
+      "step": 36
+    },
+    {
+      "epoch": 0.045402254774139124,
+      "grad_norm": 0.2417033463716507,
+      "learning_rate": 1.703270924499656e-05,
+      "loss": 0.3974,
+      "step": 37
+    },
+    {
+      "epoch": 0.046629342741007745,
+      "grad_norm": 0.12633183598518372,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 0.2882,
+      "step": 38
+    },
+    {
+      "epoch": 0.047856430707876374,
+      "grad_norm": 0.12885819375514984,
+      "learning_rate": 1.2408009626051137e-05,
+      "loss": 0.419,
+      "step": 39
+    },
+    {
+      "epoch": 0.049083518674744996,
+      "grad_norm": 0.15089887380599976,
+      "learning_rate": 1.0332332985438248e-05,
+      "loss": 0.5052,
+      "step": 40
+    },
+    {
+      "epoch": 0.050310606641613624,
+      "grad_norm": 0.15001189708709717,
+      "learning_rate": 8.426519384872733e-06,
+      "loss": 0.5051,
+      "step": 41
+    },
+    {
+      "epoch": 0.051537694608482246,
+      "grad_norm": 0.15896552801132202,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.5382,
+      "step": 42
+    },
+    {
+      "epoch": 0.05276478257535087,
+      "grad_norm": 0.1471940577030182,
+      "learning_rate": 5.156362923365588e-06,
+      "loss": 0.5069,
+      "step": 43
+    },
+    {
+      "epoch": 0.053991870542219496,
+      "grad_norm": 0.15487565100193024,
+      "learning_rate": 3.8060233744356633e-06,
+      "loss": 0.5298,
+      "step": 44
+    },
+    {
+      "epoch": 0.05521895850908812,
+      "grad_norm": 0.1594659984111786,
+      "learning_rate": 2.653493525244721e-06,
+      "loss": 0.5101,
+      "step": 45
+    },
+    {
+      "epoch": 0.056446046475956746,
+      "grad_norm": 0.1511322557926178,
+      "learning_rate": 1.70370868554659e-06,
+      "loss": 0.4587,
+      "step": 46
+    },
+    {
+      "epoch": 0.05767313444282537,
+      "grad_norm": 0.1548001766204834,
+      "learning_rate": 9.607359798384785e-07,
+      "loss": 0.4077,
+      "step": 47
+    },
+    {
+      "epoch": 0.058900222409693996,
+      "grad_norm": 0.16782091557979584,
+      "learning_rate": 4.277569313094809e-07,
+      "loss": 0.3926,
+      "step": 48
+    },
+    {
+      "epoch": 0.06012731037656262,
+      "grad_norm": 0.2028161883354187,
+      "learning_rate": 1.0705383806982606e-07,
+      "loss": 0.3914,
+      "step": 49
+    },
+    {
+      "epoch": 0.061354398343431246,
+      "grad_norm": 0.3365857005119324,
+      "learning_rate": 0.0,
+      "loss": 0.3852,
+      "step": 50
+    },
+    {
+      "epoch": 0.061354398343431246,
+      "eval_loss": 0.4557400643825531,
+      "eval_runtime": 167.4314,
+      "eval_samples_per_second": 32.79,
+      "eval_steps_per_second": 4.103,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.969948642440643e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null