Training in progress, step 100, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +354 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df5e8f19285789390f518cd5c2ee6d18c6b4079c83217593f13cb6312b900641
 size 956362232

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b6cd1a6cad6794e059cf43e749927da0aee5a92493c3b7658ede9969f510853
 size 956362232

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe3c1155940699321c9319f31ff481eb14d2a068276d35c6ad4bd1d8ece41f38
 size 486148756

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0e28ae24814d7cb3e928f6f1b556bc26177ed0db6e0d35231ce41e0886afa5a
 size 486148756

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8eccac390378b22aeb148c260b00bec01d948946d8363d5282899af673e0e86
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.03790570954750059,
   "eval_steps": 500,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -357,6 +357,356 @@
       "learning_rate": 0.00011736481776669306,
       "loss": 0.116,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -371,12 +721,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.812811709692314e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07581141909500118,
   "eval_steps": 500,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00011736481776669306,
       "loss": 0.116,
       "step": 50
+    },
+    {
+      "epoch": 0.038663823738450606,
+      "grad_norm": 0.0628269761800766,
+      "learning_rate": 0.00011391731009600654,
+      "loss": 0.1068,
+      "step": 51
+    },
+    {
+      "epoch": 0.039421937929400616,
+      "grad_norm": 0.07782096415758133,
+      "learning_rate": 0.00011045284632676536,
+      "loss": 0.1124,
+      "step": 52
+    },
+    {
+      "epoch": 0.040180052120350626,
+      "grad_norm": 0.07580537348985672,
+      "learning_rate": 0.00010697564737441252,
+      "loss": 0.1038,
+      "step": 53
+    },
+    {
+      "epoch": 0.04093816631130064,
+      "grad_norm": 0.05523601919412613,
+      "learning_rate": 0.00010348994967025012,
+      "loss": 0.1,
+      "step": 54
+    },
+    {
+      "epoch": 0.04169628050225065,
+      "grad_norm": 0.056455422192811966,
+      "learning_rate": 0.0001,
+      "loss": 0.1046,
+      "step": 55
+    },
+    {
+      "epoch": 0.04245439469320066,
+      "grad_norm": 0.052510254085063934,
+      "learning_rate": 9.651005032974994e-05,
+      "loss": 0.1025,
+      "step": 56
+    },
+    {
+      "epoch": 0.04321250888415067,
+      "grad_norm": 0.053201328963041306,
+      "learning_rate": 9.302435262558747e-05,
+      "loss": 0.0956,
+      "step": 57
+    },
+    {
+      "epoch": 0.04397062307510069,
+      "grad_norm": 0.05429815128445625,
+      "learning_rate": 8.954715367323468e-05,
+      "loss": 0.0915,
+      "step": 58
+    },
+    {
+      "epoch": 0.0447287372660507,
+      "grad_norm": 0.05565072223544121,
+      "learning_rate": 8.608268990399349e-05,
+      "loss": 0.1037,
+      "step": 59
+    },
+    {
+      "epoch": 0.04548685145700071,
+      "grad_norm": 0.06064201518893242,
+      "learning_rate": 8.263518223330697e-05,
+      "loss": 0.0996,
+      "step": 60
+    },
+    {
+      "epoch": 0.04624496564795072,
+      "grad_norm": 0.053189653903245926,
+      "learning_rate": 7.920883091822408e-05,
+      "loss": 0.0972,
+      "step": 61
+    },
+    {
+      "epoch": 0.04700307983890074,
+      "grad_norm": 0.0630035251379013,
+      "learning_rate": 7.580781044003324e-05,
+      "loss": 0.0957,
+      "step": 62
+    },
+    {
+      "epoch": 0.04776119402985075,
+      "grad_norm": 0.05781060457229614,
+      "learning_rate": 7.243626441830009e-05,
+      "loss": 0.1116,
+      "step": 63
+    },
+    {
+      "epoch": 0.04851930822080076,
+      "grad_norm": 0.05752211809158325,
+      "learning_rate": 6.909830056250527e-05,
+      "loss": 0.1015,
+      "step": 64
+    },
+    {
+      "epoch": 0.04927742241175077,
+      "grad_norm": 0.05063620209693909,
+      "learning_rate": 6.579798566743314e-05,
+      "loss": 0.1083,
+      "step": 65
+    },
+    {
+      "epoch": 0.050035536602700784,
+      "grad_norm": 0.0517071969807148,
+      "learning_rate": 6.25393406584088e-05,
+      "loss": 0.0946,
+      "step": 66
+    },
+    {
+      "epoch": 0.050793650793650794,
+      "grad_norm": 0.04985162615776062,
+      "learning_rate": 5.9326335692419995e-05,
+      "loss": 0.1058,
+      "step": 67
+    },
+    {
+      "epoch": 0.051551764984600804,
+      "grad_norm": 0.04708476364612579,
+      "learning_rate": 5.616288532109225e-05,
+      "loss": 0.0921,
+      "step": 68
+    },
+    {
+      "epoch": 0.05230987917555082,
+      "grad_norm": 0.05587763711810112,
+      "learning_rate": 5.305284372141095e-05,
+      "loss": 0.0927,
+      "step": 69
+    },
+    {
+      "epoch": 0.05306799336650083,
+      "grad_norm": 0.05187711864709854,
+      "learning_rate": 5.000000000000002e-05,
+      "loss": 0.1022,
+      "step": 70
+    },
+    {
+      "epoch": 0.05382610755745084,
+      "grad_norm": 0.05270811542868614,
+      "learning_rate": 4.700807357667952e-05,
+      "loss": 0.1022,
+      "step": 71
+    },
+    {
+      "epoch": 0.05458422174840085,
+      "grad_norm": 0.050687965005636215,
+      "learning_rate": 4.4080709652925336e-05,
+      "loss": 0.0999,
+      "step": 72
+    },
+    {
+      "epoch": 0.05534233593935087,
+      "grad_norm": 0.04412781447172165,
+      "learning_rate": 4.12214747707527e-05,
+      "loss": 0.0989,
+      "step": 73
+    },
+    {
+      "epoch": 0.05610045013030088,
+      "grad_norm": 0.04820292443037033,
+      "learning_rate": 3.843385246743417e-05,
+      "loss": 0.0882,
+      "step": 74
+    },
+    {
+      "epoch": 0.05685856432125089,
+      "grad_norm": 0.05101883411407471,
+      "learning_rate": 3.5721239031346066e-05,
+      "loss": 0.0896,
+      "step": 75
+    },
+    {
+      "epoch": 0.0576166785122009,
+      "grad_norm": 0.056556086987257004,
+      "learning_rate": 3.308693936411421e-05,
+      "loss": 0.0951,
+      "step": 76
+    },
+    {
+      "epoch": 0.058374792703150914,
+      "grad_norm": 0.05035807937383652,
+      "learning_rate": 3.053416295410026e-05,
+      "loss": 0.0911,
+      "step": 77
+    },
+    {
+      "epoch": 0.059132906894100924,
+      "grad_norm": 0.043333761394023895,
+      "learning_rate": 2.8066019966134904e-05,
+      "loss": 0.089,
+      "step": 78
+    },
+    {
+      "epoch": 0.059891021085050934,
+      "grad_norm": 0.047586455941200256,
+      "learning_rate": 2.5685517452260567e-05,
+      "loss": 0.0874,
+      "step": 79
+    },
+    {
+      "epoch": 0.06064913527600095,
+      "grad_norm": 0.0488692931830883,
+      "learning_rate": 2.339555568810221e-05,
+      "loss": 0.0925,
+      "step": 80
+    },
+    {
+      "epoch": 0.06140724946695096,
+      "grad_norm": 0.05427223816514015,
+      "learning_rate": 2.119892463932781e-05,
+      "loss": 0.0927,
+      "step": 81
+    },
+    {
+      "epoch": 0.06216536365790097,
+      "grad_norm": 0.057675670832395554,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 0.0978,
+      "step": 82
+    },
+    {
+      "epoch": 0.06292347784885098,
+      "grad_norm": 0.05663346126675606,
+      "learning_rate": 1.7096242744495837e-05,
+      "loss": 0.1041,
+      "step": 83
+    },
+    {
+      "epoch": 0.06368159203980099,
+      "grad_norm": 0.054945193231105804,
+      "learning_rate": 1.5195190384357404e-05,
+      "loss": 0.0847,
+      "step": 84
+    },
+    {
+      "epoch": 0.064439706230751,
+      "grad_norm": 0.05341991409659386,
+      "learning_rate": 1.339745962155613e-05,
+      "loss": 0.0877,
+      "step": 85
+    },
+    {
+      "epoch": 0.06519782042170102,
+      "grad_norm": 0.0533662885427475,
+      "learning_rate": 1.1705240714107302e-05,
+      "loss": 0.1052,
+      "step": 86
+    },
+    {
+      "epoch": 0.06595593461265103,
+      "grad_norm": 0.045474544167518616,
+      "learning_rate": 1.0120595370083318e-05,
+      "loss": 0.0863,
+      "step": 87
+    },
+    {
+      "epoch": 0.06671404880360104,
+      "grad_norm": 0.043375492095947266,
+      "learning_rate": 8.645454235739903e-06,
+      "loss": 0.0904,
+      "step": 88
+    },
+    {
+      "epoch": 0.06747216299455105,
+      "grad_norm": 0.05601764842867851,
+      "learning_rate": 7.281614543321269e-06,
+      "loss": 0.094,
+      "step": 89
+    },
+    {
+      "epoch": 0.06823027718550106,
+      "grad_norm": 0.0548299178481102,
+      "learning_rate": 6.030737921409169e-06,
+      "loss": 0.0857,
+      "step": 90
+    },
+    {
+      "epoch": 0.06898839137645107,
+      "grad_norm": 0.04874192178249359,
+      "learning_rate": 4.8943483704846475e-06,
+      "loss": 0.0885,
+      "step": 91
+    },
+    {
+      "epoch": 0.06974650556740108,
+      "grad_norm": 0.05287107825279236,
+      "learning_rate": 3.873830406168111e-06,
+      "loss": 0.1027,
+      "step": 92
+    },
+    {
+      "epoch": 0.0705046197583511,
+      "grad_norm": 0.04792382940649986,
+      "learning_rate": 2.970427372400353e-06,
+      "loss": 0.0849,
+      "step": 93
+    },
+    {
+      "epoch": 0.07126273394930112,
+      "grad_norm": 0.04913964122533798,
+      "learning_rate": 2.1852399266194314e-06,
+      "loss": 0.0848,
+      "step": 94
+    },
+    {
+      "epoch": 0.07202084814025113,
+      "grad_norm": 0.04982119798660278,
+      "learning_rate": 1.5192246987791981e-06,
+      "loss": 0.0959,
+      "step": 95
+    },
+    {
+      "epoch": 0.07277896233120114,
+      "grad_norm": 0.04819797724485397,
+      "learning_rate": 9.731931258429638e-07,
+      "loss": 0.0881,
+      "step": 96
+    },
+    {
+      "epoch": 0.07353707652215115,
+      "grad_norm": 0.050941213965415955,
+      "learning_rate": 5.478104631726711e-07,
+      "loss": 0.0957,
+      "step": 97
+    },
+    {
+      "epoch": 0.07429519071310116,
+      "grad_norm": 0.046391792595386505,
+      "learning_rate": 2.4359497401758024e-07,
+      "loss": 0.0859,
+      "step": 98
+    },
+    {
+      "epoch": 0.07505330490405117,
+      "grad_norm": 0.0439014732837677,
+      "learning_rate": 6.09172980904238e-08,
+      "loss": 0.0963,
+      "step": 99
+    },
+    {
+      "epoch": 0.07581141909500118,
+      "grad_norm": 0.04594559594988823,
+      "learning_rate": 0.0,
+      "loss": 0.091,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.162003273287598e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null