Training in progress, step 116, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a499d923fc8b0362c81987e3172fbcd093b0effe5bdc768e70faf0811be15f5e
 size 50358592

 version https://git-lfs.github.com/spec/v1
+oid sha256:56b2f65a1d089ee66fc7ea728b1874e441b68e560eb21bbff05c54959e5e3f21
 size 50358592

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc86c2c3b62f1125d16831eb4312c9bd1507cc6ded424140a435b9659edc365e
 size 100824826

 version https://git-lfs.github.com/spec/v1
+oid sha256:390428e84d5e9a0ce532250c5fdf92c062df738731de6205f526a446d7a1f935
 size 100824826

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3603ae0f461c0309918a469bb88361702e8fe7031d469296ef29915e59cd15f4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b51b4320dedee1d321d6d72d7aad1566ba8a70f284d4fc872e4d20473231aca4
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95019e029f304009516750a4bbe05ba42bcbfeab090e08f3a47061c7683127e4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:14579bee543d59d3eada9977676f27cb9d72db0c545f2552d8a0908f37d7fc75
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c93ea150dedd152785349606801d73a50b174319e11bd7bc4c752090cefb4196
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b25c181224c6a64505fbebbb62ba1315e8db34d2563446a817bb262831c728bb
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5e3858bb07bc0e1a65c01d5084480d194b61020c06fc22f6fa0708b202f0e34
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:45b78dae73959737264e6eed60625c40d91131768deebb2ac98a0e465f131b70
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba08b4a1855b48cf82b864725b4104527747dc0aacfb5a9d0a509cb25e565a06
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d46fb3cc6e0ccde808ead2f08288d1a1004041779e54159d7604f576ec806133
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.6968957781791687,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 2.5901374292643493,
   "eval_steps": 25,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -747,6 +747,118 @@
       "eval_samples_per_second": 79.714,
       "eval_steps_per_second": 3.189,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -770,12 +882,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.020403094913024e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.6968957781791687,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 3.005658852061439,
   "eval_steps": 25,
+  "global_step": 116,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 79.714,
       "eval_steps_per_second": 3.189,
       "step": 100
+    },
+    {
+      "epoch": 2.6160064672594987,
+      "grad_norm": 18.59197235107422,
+      "learning_rate": 1.4580325505138468e-05,
+      "loss": 22.2291,
+      "step": 101
+    },
+    {
+      "epoch": 2.6418755052546485,
+      "grad_norm": 8.891879081726074,
+      "learning_rate": 1.272815741354723e-05,
+      "loss": 22.3545,
+      "step": 102
+    },
+    {
+      "epoch": 2.667744543249798,
+      "grad_norm": 8.696002006530762,
+      "learning_rate": 1.0996558885224993e-05,
+      "loss": 22.1393,
+      "step": 103
+    },
+    {
+      "epoch": 2.6936135812449473,
+      "grad_norm": 6.413660049438477,
+      "learning_rate": 9.387050829881865e-06,
+      "loss": 22.9287,
+      "step": 104
+    },
+    {
+      "epoch": 2.719482619240097,
+      "grad_norm": 13.523515701293945,
+      "learning_rate": 7.90104692187129e-06,
+      "loss": 22.8497,
+      "step": 105
+    },
+    {
+      "epoch": 2.7453516572352465,
+      "grad_norm": 8.049901008605957,
+      "learning_rate": 6.539852358521636e-06,
+      "loss": 22.0333,
+      "step": 106
+    },
+    {
+      "epoch": 2.7712206952303964,
+      "grad_norm": 8.899979591369629,
+      "learning_rate": 5.304662713746205e-06,
+      "loss": 22.1953,
+      "step": 107
+    },
+    {
+      "epoch": 2.7970897332255458,
+      "grad_norm": 9.008318901062012,
+      "learning_rate": 4.1965628879383875e-06,
+      "loss": 22.1504,
+      "step": 108
+    },
+    {
+      "epoch": 2.822958771220695,
+      "grad_norm": 13.858719825744629,
+      "learning_rate": 3.2165261550743946e-06,
+      "loss": 22.0938,
+      "step": 109
+    },
+    {
+      "epoch": 2.8488278092158446,
+      "grad_norm": 6.062250137329102,
+      "learning_rate": 2.3654133078604753e-06,
+      "loss": 22.1504,
+      "step": 110
+    },
+    {
+      "epoch": 2.8746968472109944,
+      "grad_norm": 5.169662952423096,
+      "learning_rate": 1.643971901675395e-06,
+      "loss": 22.1182,
+      "step": 111
+    },
+    {
+      "epoch": 2.900565885206144,
+      "grad_norm": 9.194791793823242,
+      "learning_rate": 1.0528355979724624e-06,
+      "loss": 22.0225,
+      "step": 112
+    },
+    {
+      "epoch": 2.9264349232012936,
+      "grad_norm": 5.832217693328857,
+      "learning_rate": 5.925236077174655e-07,
+      "loss": 22.2256,
+      "step": 113
+    },
+    {
+      "epoch": 2.952303961196443,
+      "grad_norm": 3.4554474353790283,
+      "learning_rate": 2.634402353517973e-07,
+      "loss": 22.0733,
+      "step": 114
+    },
+    {
+      "epoch": 2.9781729991915924,
+      "grad_norm": 10.36470890045166,
+      "learning_rate": 6.587452368084779e-08,
+      "loss": 22.0811,
+      "step": 115
+    },
+    {
+      "epoch": 3.005658852061439,
+      "grad_norm": 10.728325843811035,
+      "learning_rate": 0.0,
+      "loss": 21.647,
+      "step": 116
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1836675900991078e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null