Training in progress, step 65, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:929a8af8e27f62639075bdeea688a46e24e34fc94e58fa753777b1dd88591817
 size 97728

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ac43bcbe62cf88428bd76ef9f660ffb66c6bc6f882c810450c99bd197abd87c
 size 97728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79edf61b4fe2c56808485160c943c488095f3d4171f1a7d68467fcfb8ec251d8
 size 212298

 version https://git-lfs.github.com/spec/v1
+oid sha256:92eb6bc2b8af1737fa1071532157705b44be4657790c76faa4fc15523463b798
 size 212298

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ad1349adb94f706f06429ed068c830a154214fce15b2a1aca2b3e8078b4e60e
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f40dc00d91fa62ca20098eb648bf83dd83d95b4d9d3e2628f168d70bedc5489
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10812802173bdc9f372d285a96389fc081fc7d318ad29ad1832d8cbe9dc0b4fb
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:9352282697b9dcfccaddcaa2a800605fcf6a10ce693840e216a2e035ab6f3957
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ce6bb83e23e624faa7d4d0f0d916aa2bfb398e54c45dc8ab27f6276d8d4836b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:f66f042c25e6cbde339a954d339d420fc0a7c2a999c2e8ba7f52aaa48d6be7ba
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d3d30cece1e3ffca9dbba541c8e005ad3300bb9a15b32d293b6da038ae185ae
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:749821883151fb8dc03df28a34be7953b04551124e87e16ad22ffa3877dc737f
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77a3132902dff05847d42068955a2890663d28a5047f2e7ac187fce7795a5d4b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9d60776992f1107ed357b048c7d4ed0a296c5ebe751022b707db636852f033a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 10.37548828125,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 2.3391812865497075,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,111 @@
       "eval_samples_per_second": 660.475,
       "eval_steps_per_second": 171.723,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -404,12 +509,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 42768059596800.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 10.37548828125,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 3.0409356725146197,
   "eval_steps": 25,
+  "global_step": 65,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 660.475,
       "eval_steps_per_second": 171.723,
       "step": 50
+    },
+    {
+      "epoch": 2.3859649122807016,
+      "grad_norm": 0.1374143660068512,
+      "learning_rate": 2.0855884478824412e-05,
+      "loss": 10.6913,
+      "step": 51
+    },
+    {
+      "epoch": 2.4327485380116958,
+      "grad_norm": 0.13993822038173676,
+      "learning_rate": 1.9415091837803573e-05,
+      "loss": 9.6339,
+      "step": 52
+    },
+    {
+      "epoch": 2.47953216374269,
+      "grad_norm": 0.1723541021347046,
+      "learning_rate": 1.806564514567258e-05,
+      "loss": 11.5109,
+      "step": 53
+    },
+    {
+      "epoch": 2.526315789473684,
+      "grad_norm": 0.13717088103294373,
+      "learning_rate": 1.6811008412736208e-05,
+      "loss": 9.4536,
+      "step": 54
+    },
+    {
+      "epoch": 2.573099415204678,
+      "grad_norm": 0.13807837665081024,
+      "learning_rate": 1.5654402273493805e-05,
+      "loss": 10.6052,
+      "step": 55
+    },
+    {
+      "epoch": 2.6198830409356724,
+      "grad_norm": 0.13864926993846893,
+      "learning_rate": 1.459879571931663e-05,
+      "loss": 10.2006,
+      "step": 56
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.14294551312923431,
+      "learning_rate": 1.3646898477089626e-05,
+      "loss": 10.4874,
+      "step": 57
+    },
+    {
+      "epoch": 2.7134502923976607,
+      "grad_norm": 0.16176921129226685,
+      "learning_rate": 1.2801154053381386e-05,
+      "loss": 11.0147,
+      "step": 58
+    },
+    {
+      "epoch": 2.760233918128655,
+      "grad_norm": 0.13276654481887817,
+      "learning_rate": 1.2063733461997805e-05,
+      "loss": 9.6744,
+      "step": 59
+    },
+    {
+      "epoch": 2.807017543859649,
+      "grad_norm": 0.14666683971881866,
+      "learning_rate": 1.1436529651020813e-05,
+      "loss": 10.4008,
+      "step": 60
+    },
+    {
+      "epoch": 2.853801169590643,
+      "grad_norm": 0.1457078456878662,
+      "learning_rate": 1.092115264363775e-05,
+      "loss": 10.5984,
+      "step": 61
+    },
+    {
+      "epoch": 2.9005847953216373,
+      "grad_norm": 0.13246245682239532,
+      "learning_rate": 1.0518925405234989e-05,
+      "loss": 9.8514,
+      "step": 62
+    },
+    {
+      "epoch": 2.9473684210526314,
+      "grad_norm": 0.14585375785827637,
+      "learning_rate": 1.023088044736472e-05,
+      "loss": 10.7322,
+      "step": 63
+    },
+    {
+      "epoch": 2.9941520467836256,
+      "grad_norm": 0.1970202624797821,
+      "learning_rate": 1.0057757177302627e-05,
+      "loss": 14.7962,
+      "step": 64
+    },
+    {
+      "epoch": 3.0409356725146197,
+      "grad_norm": 0.18727658689022064,
+      "learning_rate": 1e-05,
+      "loss": 12.5777,
+      "step": 65
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 55598477475840.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null