Training in progress, step 46, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +151 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc3168d5bf514e4885b6e429832adfd0ee61586dda145c122c868799a329b886
 size 400084608

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4c31dd7b3c3db76c8fc36333507706d635b1f616c41aee12360218a048b6dc6
 size 400084608

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1d3e5bed95ab058d70f9d1495cbaf4610a6edaacefc24a4e4d83a19d239addd
 size 800394282

 version https://git-lfs.github.com/spec/v1
+oid sha256:2562c5ddcea9945912cb2a946347ffebfefa0851f799bb5133f4733d3ce6bf2a
 size 800394282

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:102554df4885f31f197ed10706d5368fa3b241d2411a9ddb16a089ee407e85c6
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:70cb8f79625e98547b5d8e950ddc4d0340026c84e5b444b07c6fc33c9705be7d
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:507c77fa099f7b8aa6ca37bb0d50ab18a20750fb723ba6f63f57fa07330402b7
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e52ba6d8adf0e9e4728fa240e5f5072f687ff7645d061fcae8499b664d12162
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40dab700699d738f8cbb9ba2f1d013bfe9663086d5af235f18a4583da5f1d08e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0357c2adcf10164d42761f9017d93e7c157c96368d0afcbcd479d8be5c126b07
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1971faa6f67476b6e3931b051c4402a8a06810f774eec759dc261f8842769bab
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c67f05a2904f1e921304078248ef4709a735a7ed311915a096903496b37b7df
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad4e4728265849a5c8f503e18c55a53f2fb6db704986db6665c2db1ebdbba252
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ae5239404ab97e0fcf1d64e94bd56d195792c49faa00afb9e436068b35b4aef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.29814502596855164,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 1.6528925619834711,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,153 @@
       "eval_samples_per_second": 10.323,
       "eval_steps_per_second": 2.684,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -221,12 +368,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.087239672233984e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.29814502596855164,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 3.041322314049587,
   "eval_steps": 25,
+  "global_step": 46,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.323,
       "eval_steps_per_second": 2.684,
       "step": 25
+    },
+    {
+      "epoch": 1.71900826446281,
+      "grad_norm": 0.38563114404678345,
+      "learning_rate": 4.859583227770218e-05,
+      "loss": 0.1666,
+      "step": 26
+    },
+    {
+      "epoch": 1.7851239669421488,
+      "grad_norm": 0.32108181715011597,
+      "learning_rate": 4.543456197011605e-05,
+      "loss": 0.185,
+      "step": 27
+    },
+    {
+      "epoch": 1.8512396694214877,
+      "grad_norm": 0.3681396245956421,
+      "learning_rate": 4.232203494213567e-05,
+      "loss": 0.2199,
+      "step": 28
+    },
+    {
+      "epoch": 1.9173553719008265,
+      "grad_norm": 0.37222185730934143,
+      "learning_rate": 3.927411191804058e-05,
+      "loss": 0.2032,
+      "step": 29
+    },
+    {
+      "epoch": 1.9834710743801653,
+      "grad_norm": 0.4296535551548004,
+      "learning_rate": 3.630632441491512e-05,
+      "loss": 0.1851,
+      "step": 30
+    },
+    {
+      "epoch": 2.049586776859504,
+      "grad_norm": 0.8122077584266663,
+      "learning_rate": 3.343379559759746e-05,
+      "loss": 0.2605,
+      "step": 31
+    },
+    {
+      "epoch": 2.115702479338843,
+      "grad_norm": 0.29376327991485596,
+      "learning_rate": 3.067116321449813e-05,
+      "loss": 0.1641,
+      "step": 32
+    },
+    {
+      "epoch": 2.1818181818181817,
+      "grad_norm": 0.3262518048286438,
+      "learning_rate": 2.803250500698939e-05,
+      "loss": 0.1464,
+      "step": 33
+    },
+    {
+      "epoch": 2.2479338842975207,
+      "grad_norm": 0.2500488758087158,
+      "learning_rate": 2.5531266972462177e-05,
+      "loss": 0.0992,
+      "step": 34
+    },
+    {
+      "epoch": 2.3140495867768593,
+      "grad_norm": 0.3410912752151489,
+      "learning_rate": 2.3180194846605367e-05,
+      "loss": 0.1822,
+      "step": 35
+    },
+    {
+      "epoch": 2.3801652892561984,
+      "grad_norm": 0.28179067373275757,
+      "learning_rate": 2.0991269154058385e-05,
+      "loss": 0.1224,
+      "step": 36
+    },
+    {
+      "epoch": 2.446280991735537,
+      "grad_norm": 0.30862489342689514,
+      "learning_rate": 1.897564415840379e-05,
+      "loss": 0.1116,
+      "step": 37
+    },
+    {
+      "epoch": 2.512396694214876,
+      "grad_norm": 0.2770218253135681,
+      "learning_rate": 1.7143591022596845e-05,
+      "loss": 0.1074,
+      "step": 38
+    },
+    {
+      "epoch": 2.5785123966942147,
+      "grad_norm": 0.3372029662132263,
+      "learning_rate": 1.5504445469473496e-05,
+      "loss": 0.1526,
+      "step": 39
+    },
+    {
+      "epoch": 2.644628099173554,
+      "grad_norm": 0.3446227014064789,
+      "learning_rate": 1.4066560209046673e-05,
+      "loss": 0.1372,
+      "step": 40
+    },
+    {
+      "epoch": 2.7107438016528924,
+      "grad_norm": 0.34640777111053467,
+      "learning_rate": 1.2837262375010731e-05,
+      "loss": 0.1075,
+      "step": 41
+    },
+    {
+      "epoch": 2.7768595041322315,
+      "grad_norm": 0.30039989948272705,
+      "learning_rate": 1.1822816187347623e-05,
+      "loss": 0.1153,
+      "step": 42
+    },
+    {
+      "epoch": 2.84297520661157,
+      "grad_norm": 0.34811943769454956,
+      "learning_rate": 1.1028391031297826e-05,
+      "loss": 0.1557,
+      "step": 43
+    },
+    {
+      "epoch": 2.909090909090909,
+      "grad_norm": 0.34190842509269714,
+      "learning_rate": 1.0458035115358032e-05,
+      "loss": 0.1301,
+      "step": 44
+    },
+    {
+      "epoch": 2.975206611570248,
+      "grad_norm": 0.3699391782283783,
+      "learning_rate": 1.0114654842538593e-05,
+      "loss": 0.0992,
+      "step": 45
+    },
+    {
+      "epoch": 3.041322314049587,
+      "grad_norm": 0.8737526535987854,
+      "learning_rate": 1e-05,
+      "loss": 0.2512,
+      "step": 46
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.6805209969105306e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null