Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +203 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e766684d1813b64c196dcbaddf4024c89c40d8e33679927ed128fb9588d0f99
 size 2269195160

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1ac29f7fbd6e649e6f248f714928a4196656f56d15fd27473f1478996f9f2f6
 size 2269195160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7701a700b89b14d24dab2fa39eb4c39dd1a289862ea3598ab989dd184942dcb1
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e88026866ae92dd4cc2c7cac64118f584c33430823037e307e74ff85dc2bb12
 size 335922386

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aeda752780d4d1e2c8cc72596036685baf31024a594da9ff5aea8a1a54a0f80c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b23184210b5274d29d7aab370a6df28ffac3ad6df598e0776930b01f3163f7d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f23e2214bcafb439ebc7528dcc283ef6218d509a276c0baff0743503ecbe3d92
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.007116087100906115,
   "eval_steps": 9,
-  "global_step": 75,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -604,6 +604,205 @@
       "learning_rate": 1.7860619515673033e-05,
       "loss": 1.4126,
       "step": 75
     }
   ],
   "logging_steps": 1,
@@ -618,12 +817,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.56415764660224e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.009488116134541487,
   "eval_steps": 9,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.7860619515673033e-05,
       "loss": 1.4126,
       "step": 75
+    },
+    {
+      "epoch": 0.00721096826225153,
+      "grad_norm": 0.9957802891731262,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 1.7137,
+      "step": 76
+    },
+    {
+      "epoch": 0.0073058494235969445,
+      "grad_norm": 0.8167088031768799,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 1.5226,
+      "step": 77
+    },
+    {
+      "epoch": 0.00740073058494236,
+      "grad_norm": 0.8567049503326416,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 1.5889,
+      "step": 78
+    },
+    {
+      "epoch": 0.007495611746287775,
+      "grad_norm": 0.8254848718643188,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 1.5097,
+      "step": 79
+    },
+    {
+      "epoch": 0.00759049290763319,
+      "grad_norm": 1.0071043968200684,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 1.7966,
+      "step": 80
+    },
+    {
+      "epoch": 0.007685374068978604,
+      "grad_norm": 0.8706706166267395,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 1.6111,
+      "step": 81
+    },
+    {
+      "epoch": 0.007685374068978604,
+      "eval_loss": 1.5721654891967773,
+      "eval_runtime": 1086.0032,
+      "eval_samples_per_second": 8.173,
+      "eval_steps_per_second": 1.022,
+      "step": 81
+    },
+    {
+      "epoch": 0.007780255230324019,
+      "grad_norm": 0.9305638074874878,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.6522,
+      "step": 82
+    },
+    {
+      "epoch": 0.007875136391669434,
+      "grad_norm": 0.7519930601119995,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 1.4109,
+      "step": 83
+    },
+    {
+      "epoch": 0.007970017553014849,
+      "grad_norm": 0.8699010610580444,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 1.7363,
+      "step": 84
+    },
+    {
+      "epoch": 0.008064898714360265,
+      "grad_norm": 0.8968572020530701,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 1.6637,
+      "step": 85
+    },
+    {
+      "epoch": 0.008159779875705679,
+      "grad_norm": 0.8648973703384399,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 1.5079,
+      "step": 86
+    },
+    {
+      "epoch": 0.008254661037051093,
+      "grad_norm": 0.8895729780197144,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 1.5252,
+      "step": 87
+    },
+    {
+      "epoch": 0.008349542198396509,
+      "grad_norm": 0.8378965854644775,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 1.7974,
+      "step": 88
+    },
+    {
+      "epoch": 0.008444423359741923,
+      "grad_norm": 0.8197259306907654,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 1.7134,
+      "step": 89
+    },
+    {
+      "epoch": 0.008539304521087338,
+      "grad_norm": 0.8713379502296448,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 1.457,
+      "step": 90
+    },
+    {
+      "epoch": 0.008539304521087338,
+      "eval_loss": 1.570660948753357,
+      "eval_runtime": 1085.8766,
+      "eval_samples_per_second": 8.174,
+      "eval_steps_per_second": 1.022,
+      "step": 90
+    },
+    {
+      "epoch": 0.008634185682432753,
+      "grad_norm": 0.8578446507453918,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.4388,
+      "step": 91
+    },
+    {
+      "epoch": 0.008729066843778168,
+      "grad_norm": 0.9030793309211731,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 1.5076,
+      "step": 92
+    },
+    {
+      "epoch": 0.008823948005123582,
+      "grad_norm": 0.898984432220459,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 1.3373,
+      "step": 93
+    },
+    {
+      "epoch": 0.008918829166468998,
+      "grad_norm": 0.8677286505699158,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 1.4457,
+      "step": 94
+    },
+    {
+      "epoch": 0.009013710327814412,
+      "grad_norm": 0.8072497844696045,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 1.2907,
+      "step": 95
+    },
+    {
+      "epoch": 0.009108591489159828,
+      "grad_norm": 0.9417414665222168,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 1.5188,
+      "step": 96
+    },
+    {
+      "epoch": 0.009203472650505242,
+      "grad_norm": 0.8864390254020691,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 1.6006,
+      "step": 97
+    },
+    {
+      "epoch": 0.009298353811850657,
+      "grad_norm": 0.8669371604919434,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 1.5055,
+      "step": 98
+    },
+    {
+      "epoch": 0.009393234973196073,
+      "grad_norm": 0.8430877327919006,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 1.352,
+      "step": 99
+    },
+    {
+      "epoch": 0.009393234973196073,
+      "eval_loss": 1.5703389644622803,
+      "eval_runtime": 1085.4904,
+      "eval_samples_per_second": 8.177,
+      "eval_steps_per_second": 1.023,
+      "step": 99
+    },
+    {
+      "epoch": 0.009488116134541487,
+      "grad_norm": 0.8918697834014893,
+      "learning_rate": 0.0,
+      "loss": 1.4941,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.41887686213632e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null