Training in progress, step 75, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2b1c34a4b80d1ad3d9bab875327f8d0356113793f547447d5e142004ed91bed
 size 156926880

 version https://git-lfs.github.com/spec/v1
+oid sha256:fcdc416252bb33147fef82f0103506f795f4f99e4773a3a95d3623a05d0b32c5
 size 156926880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff7b4d6e5bb3202587d85c17cfa5f152b9d43559387d851c8a97670bb1c55c90
 size 313998650

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5b6bdaaee4c57f59e32c951fde0b7f42040841f6bffa7d6b02b1fb918e24e3f
 size 313998650

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3913249c7030fd4849795db814a510e322d4e67dbc8d7d8502df7485aac26334
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:20d70d9305944d16e1bb0140d1a45740ff9c558e3344aae1a0d88f2f59da68dc
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bed0a9412b6595f9d37ff761f0a96b8b4cdd7868bd260c0925ef7e4911f74ae9
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b46bdc1a68b7b880812c047fb6f2f92bd410ebb1e51b0cda89a0cf144bbbed6
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f78941e0377783e626cab4746fc84741b5f8e34159246297280e2b604d274d6
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f75f3aa3bda8cf06df039be01a035b269578bc3fa42636e7793bdb0124c8ba0
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84aa212b501f7b9dff0c6b58c8a6079f60b49f3eddbc582b992711ca1781ffdc
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:527e8436e75881ea1e88e09daa27ffb1e8d3301ec45158f48be7280b087324ac
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3192059420211edc26748a07e2699e00ee3be65f92a2cf84ef56d22611d9eb5c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8d5f446d81df8b8e6d5d3423a874831fec3d08bffbe4980db71b54ceb3e7bd4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.6151813268661499,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.46565774155995343,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,189 @@
       "eval_samples_per_second": 86.174,
       "eval_steps_per_second": 5.419,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -409,7 +592,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.178927560327168e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.587303638458252,
+  "best_model_checkpoint": "miner_id_24/checkpoint-75",
+  "epoch": 0.6984866123399301,
   "eval_steps": 25,
+  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 86.174,
       "eval_steps_per_second": 5.419,
       "step": 50
+    },
+    {
+      "epoch": 0.47497089639115253,
+      "grad_norm": 2.323246717453003,
+      "learning_rate": 5.247918773366112e-05,
+      "loss": 0.6403,
+      "step": 51
+    },
+    {
+      "epoch": 0.4842840512223516,
+      "grad_norm": 1.077490210533142,
+      "learning_rate": 5.0826697238317935e-05,
+      "loss": 0.5904,
+      "step": 52
+    },
+    {
+      "epoch": 0.4935972060535506,
+      "grad_norm": 0.8611325621604919,
+      "learning_rate": 4.917330276168208e-05,
+      "loss": 0.5742,
+      "step": 53
+    },
+    {
+      "epoch": 0.5029103608847497,
+      "grad_norm": 1.0277608633041382,
+      "learning_rate": 4.7520812266338885e-05,
+      "loss": 0.594,
+      "step": 54
+    },
+    {
+      "epoch": 0.5122235157159488,
+      "grad_norm": 1.1187914609909058,
+      "learning_rate": 4.5871032726383386e-05,
+      "loss": 0.5995,
+      "step": 55
+    },
+    {
+      "epoch": 0.5215366705471478,
+      "grad_norm": 0.8779079914093018,
+      "learning_rate": 4.4225768151520694e-05,
+      "loss": 0.602,
+      "step": 56
+    },
+    {
+      "epoch": 0.5308498253783469,
+      "grad_norm": 0.9306710362434387,
+      "learning_rate": 4.2586817614407895e-05,
+      "loss": 0.5815,
+      "step": 57
+    },
+    {
+      "epoch": 0.540162980209546,
+      "grad_norm": 1.022081971168518,
+      "learning_rate": 4.095597328339452e-05,
+      "loss": 0.6049,
+      "step": 58
+    },
+    {
+      "epoch": 0.5494761350407451,
+      "grad_norm": 1.087207555770874,
+      "learning_rate": 3.933501846281267e-05,
+      "loss": 0.6495,
+      "step": 59
+    },
+    {
+      "epoch": 0.5587892898719441,
+      "grad_norm": 1.0766098499298096,
+      "learning_rate": 3.772572564296005e-05,
+      "loss": 0.6144,
+      "step": 60
+    },
+    {
+      "epoch": 0.5681024447031432,
+      "grad_norm": 1.2028203010559082,
+      "learning_rate": 3.612985456190778e-05,
+      "loss": 0.6065,
+      "step": 61
+    },
+    {
+      "epoch": 0.5774155995343423,
+      "grad_norm": 1.1321920156478882,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 0.5828,
+      "step": 62
+    },
+    {
+      "epoch": 0.5867287543655413,
+      "grad_norm": 1.5199047327041626,
+      "learning_rate": 3.298534127791785e-05,
+      "loss": 0.6298,
+      "step": 63
+    },
+    {
+      "epoch": 0.5960419091967404,
+      "grad_norm": 1.229564905166626,
+      "learning_rate": 3.144013755408895e-05,
+      "loss": 0.5927,
+      "step": 64
+    },
+    {
+      "epoch": 0.6053550640279395,
+      "grad_norm": 0.8759309649467468,
+      "learning_rate": 2.991522876735154e-05,
+      "loss": 0.5443,
+      "step": 65
+    },
+    {
+      "epoch": 0.6146682188591386,
+      "grad_norm": 0.7552090287208557,
+      "learning_rate": 2.8412282383075363e-05,
+      "loss": 0.5487,
+      "step": 66
+    },
+    {
+      "epoch": 0.6239813736903376,
+      "grad_norm": 0.9531745314598083,
+      "learning_rate": 2.693294185106562e-05,
+      "loss": 0.5292,
+      "step": 67
+    },
+    {
+      "epoch": 0.6332945285215367,
+      "grad_norm": 1.0337250232696533,
+      "learning_rate": 2.547882480847461e-05,
+      "loss": 0.5255,
+      "step": 68
+    },
+    {
+      "epoch": 0.6426076833527358,
+      "grad_norm": 0.9133326411247253,
+      "learning_rate": 2.405152131093926e-05,
+      "loss": 0.552,
+      "step": 69
+    },
+    {
+      "epoch": 0.6519208381839348,
+      "grad_norm": 0.99112468957901,
+      "learning_rate": 2.2652592093878666e-05,
+      "loss": 0.5296,
+      "step": 70
+    },
+    {
+      "epoch": 0.6612339930151339,
+      "grad_norm": 1.1152122020721436,
+      "learning_rate": 2.128356686585282e-05,
+      "loss": 0.5799,
+      "step": 71
+    },
+    {
+      "epoch": 0.670547147846333,
+      "grad_norm": 0.9779994487762451,
+      "learning_rate": 1.9945942635848748e-05,
+      "loss": 0.5775,
+      "step": 72
+    },
+    {
+      "epoch": 0.6798603026775321,
+      "grad_norm": 0.9947242736816406,
+      "learning_rate": 1.8641182076323148e-05,
+      "loss": 0.5926,
+      "step": 73
+    },
+    {
+      "epoch": 0.689173457508731,
+      "grad_norm": 1.2468385696411133,
+      "learning_rate": 1.7370711923791567e-05,
+      "loss": 0.5915,
+      "step": 74
+    },
+    {
+      "epoch": 0.6984866123399301,
+      "grad_norm": 1.1480826139450073,
+      "learning_rate": 1.6135921418712956e-05,
+      "loss": 0.5552,
+      "step": 75
+    },
+    {
+      "epoch": 0.6984866123399301,
+      "eval_loss": 0.587303638458252,
+      "eval_runtime": 16.8051,
+      "eval_samples_per_second": 86.105,
+      "eval_steps_per_second": 5.415,
+      "step": 75
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.768391340490752e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null