Training in progress, step 100, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +354 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:562229773e7e0556cb22e660009df86de5884fa01ab488d6b685ca045cc38c55
 size 3653851880

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c9b2364be7f3c84d373b7ef1118db06fe33dc81a615f30a3fa612077f33a019
 size 3653851880

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:307c0fc0d05675424264643bb30dbc04ee107ead7619984d76afdf298a3c122c
 size 1856183844

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe079ab42c71d5ee681ef764bdaa65934d9b6485a23efa69d07b25da6a845155
 size 1856183844

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8eccac390378b22aeb148c260b00bec01d948946d8363d5282899af673e0e86
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.01895330379776825,
   "eval_steps": 500,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -357,6 +357,356 @@
       "learning_rate": 0.00011736481776669306,
       "loss": 0.0993,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -371,12 +721,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.9437929254121472e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0379066075955365,
   "eval_steps": 500,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00011736481776669306,
       "loss": 0.0993,
       "step": 50
+    },
+    {
+      "epoch": 0.019332369873723613,
+      "grad_norm": 0.06991136819124222,
+      "learning_rate": 0.00011391731009600654,
+      "loss": 0.0873,
+      "step": 51
+    },
+    {
+      "epoch": 0.019711435949678977,
+      "grad_norm": 0.06695719808340073,
+      "learning_rate": 0.00011045284632676536,
+      "loss": 0.0876,
+      "step": 52
+    },
+    {
+      "epoch": 0.020090502025634345,
+      "grad_norm": 0.07722793519496918,
+      "learning_rate": 0.00010697564737441252,
+      "loss": 0.0977,
+      "step": 53
+    },
+    {
+      "epoch": 0.02046956810158971,
+      "grad_norm": 0.06521150469779968,
+      "learning_rate": 0.00010348994967025012,
+      "loss": 0.0856,
+      "step": 54
+    },
+    {
+      "epoch": 0.020848634177545074,
+      "grad_norm": 0.07508374005556107,
+      "learning_rate": 0.0001,
+      "loss": 0.0876,
+      "step": 55
+    },
+    {
+      "epoch": 0.021227700253500438,
+      "grad_norm": 0.05650763958692551,
+      "learning_rate": 9.651005032974994e-05,
+      "loss": 0.0837,
+      "step": 56
+    },
+    {
+      "epoch": 0.021606766329455802,
+      "grad_norm": 0.07390966266393661,
+      "learning_rate": 9.302435262558747e-05,
+      "loss": 0.0879,
+      "step": 57
+    },
+    {
+      "epoch": 0.021985832405411167,
+      "grad_norm": 0.058437567204236984,
+      "learning_rate": 8.954715367323468e-05,
+      "loss": 0.0773,
+      "step": 58
+    },
+    {
+      "epoch": 0.022364898481366535,
+      "grad_norm": 0.06248459964990616,
+      "learning_rate": 8.608268990399349e-05,
+      "loss": 0.0788,
+      "step": 59
+    },
+    {
+      "epoch": 0.0227439645573219,
+      "grad_norm": 0.0653211697936058,
+      "learning_rate": 8.263518223330697e-05,
+      "loss": 0.0855,
+      "step": 60
+    },
+    {
+      "epoch": 0.023123030633277263,
+      "grad_norm": 0.062243081629276276,
+      "learning_rate": 7.920883091822408e-05,
+      "loss": 0.0886,
+      "step": 61
+    },
+    {
+      "epoch": 0.023502096709232628,
+      "grad_norm": 0.055676043033599854,
+      "learning_rate": 7.580781044003324e-05,
+      "loss": 0.0897,
+      "step": 62
+    },
+    {
+      "epoch": 0.023881162785187992,
+      "grad_norm": 0.05823361128568649,
+      "learning_rate": 7.243626441830009e-05,
+      "loss": 0.0753,
+      "step": 63
+    },
+    {
+      "epoch": 0.02426022886114336,
+      "grad_norm": 0.047967329621315,
+      "learning_rate": 6.909830056250527e-05,
+      "loss": 0.0804,
+      "step": 64
+    },
+    {
+      "epoch": 0.024639294937098724,
+      "grad_norm": 0.04651477187871933,
+      "learning_rate": 6.579798566743314e-05,
+      "loss": 0.0737,
+      "step": 65
+    },
+    {
+      "epoch": 0.02501836101305409,
+      "grad_norm": 0.06440019607543945,
+      "learning_rate": 6.25393406584088e-05,
+      "loss": 0.0931,
+      "step": 66
+    },
+    {
+      "epoch": 0.025397427089009453,
+      "grad_norm": 0.0673041045665741,
+      "learning_rate": 5.9326335692419995e-05,
+      "loss": 0.0926,
+      "step": 67
+    },
+    {
+      "epoch": 0.025776493164964817,
+      "grad_norm": 0.06460334360599518,
+      "learning_rate": 5.616288532109225e-05,
+      "loss": 0.1055,
+      "step": 68
+    },
+    {
+      "epoch": 0.02615555924092018,
+      "grad_norm": 0.06001191958785057,
+      "learning_rate": 5.305284372141095e-05,
+      "loss": 0.0815,
+      "step": 69
+    },
+    {
+      "epoch": 0.02653462531687555,
+      "grad_norm": 0.05296599864959717,
+      "learning_rate": 5.000000000000002e-05,
+      "loss": 0.0837,
+      "step": 70
+    },
+    {
+      "epoch": 0.026913691392830914,
+      "grad_norm": 0.0675460696220398,
+      "learning_rate": 4.700807357667952e-05,
+      "loss": 0.0848,
+      "step": 71
+    },
+    {
+      "epoch": 0.027292757468786278,
+      "grad_norm": 0.05409557744860649,
+      "learning_rate": 4.4080709652925336e-05,
+      "loss": 0.0779,
+      "step": 72
+    },
+    {
+      "epoch": 0.027671823544741642,
+      "grad_norm": 0.05015251785516739,
+      "learning_rate": 4.12214747707527e-05,
+      "loss": 0.0788,
+      "step": 73
+    },
+    {
+      "epoch": 0.028050889620697007,
+      "grad_norm": 0.05301973596215248,
+      "learning_rate": 3.843385246743417e-05,
+      "loss": 0.0764,
+      "step": 74
+    },
+    {
+      "epoch": 0.02842995569665237,
+      "grad_norm": 0.05640785023570061,
+      "learning_rate": 3.5721239031346066e-05,
+      "loss": 0.0867,
+      "step": 75
+    },
+    {
+      "epoch": 0.02880902177260774,
+      "grad_norm": 0.055719390511512756,
+      "learning_rate": 3.308693936411421e-05,
+      "loss": 0.0788,
+      "step": 76
+    },
+    {
+      "epoch": 0.029188087848563103,
+      "grad_norm": 0.06163398548960686,
+      "learning_rate": 3.053416295410026e-05,
+      "loss": 0.0807,
+      "step": 77
+    },
+    {
+      "epoch": 0.029567153924518468,
+      "grad_norm": 0.06289924681186676,
+      "learning_rate": 2.8066019966134904e-05,
+      "loss": 0.0808,
+      "step": 78
+    },
+    {
+      "epoch": 0.029946220000473832,
+      "grad_norm": 0.056900832802057266,
+      "learning_rate": 2.5685517452260567e-05,
+      "loss": 0.0762,
+      "step": 79
+    },
+    {
+      "epoch": 0.030325286076429196,
+      "grad_norm": 0.06545019149780273,
+      "learning_rate": 2.339555568810221e-05,
+      "loss": 0.0929,
+      "step": 80
+    },
+    {
+      "epoch": 0.030704352152384564,
+      "grad_norm": 0.060016706585884094,
+      "learning_rate": 2.119892463932781e-05,
+      "loss": 0.0752,
+      "step": 81
+    },
+    {
+      "epoch": 0.03108341822833993,
+      "grad_norm": 0.055878885090351105,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 0.0753,
+      "step": 82
+    },
+    {
+      "epoch": 0.03146248430429529,
+      "grad_norm": 0.06015196815133095,
+      "learning_rate": 1.7096242744495837e-05,
+      "loss": 0.086,
+      "step": 83
+    },
+    {
+      "epoch": 0.03184155038025066,
+      "grad_norm": 0.05178290605545044,
+      "learning_rate": 1.5195190384357404e-05,
+      "loss": 0.0793,
+      "step": 84
+    },
+    {
+      "epoch": 0.032220616456206025,
+      "grad_norm": 0.05161258578300476,
+      "learning_rate": 1.339745962155613e-05,
+      "loss": 0.0809,
+      "step": 85
+    },
+    {
+      "epoch": 0.03259968253216139,
+      "grad_norm": 0.05504770204424858,
+      "learning_rate": 1.1705240714107302e-05,
+      "loss": 0.0772,
+      "step": 86
+    },
+    {
+      "epoch": 0.032978748608116754,
+      "grad_norm": 0.05566380172967911,
+      "learning_rate": 1.0120595370083318e-05,
+      "loss": 0.0726,
+      "step": 87
+    },
+    {
+      "epoch": 0.03335781468407212,
+      "grad_norm": 0.05389956384897232,
+      "learning_rate": 8.645454235739903e-06,
+      "loss": 0.0779,
+      "step": 88
+    },
+    {
+      "epoch": 0.03373688076002748,
+      "grad_norm": 0.05445749685168266,
+      "learning_rate": 7.281614543321269e-06,
+      "loss": 0.076,
+      "step": 89
+    },
+    {
+      "epoch": 0.03411594683598285,
+      "grad_norm": 0.04557236656546593,
+      "learning_rate": 6.030737921409169e-06,
+      "loss": 0.075,
+      "step": 90
+    },
+    {
+      "epoch": 0.03449501291193821,
+      "grad_norm": 0.050466809421777725,
+      "learning_rate": 4.8943483704846475e-06,
+      "loss": 0.0747,
+      "step": 91
+    },
+    {
+      "epoch": 0.034874078987893575,
+      "grad_norm": 0.04975885897874832,
+      "learning_rate": 3.873830406168111e-06,
+      "loss": 0.0746,
+      "step": 92
+    },
+    {
+      "epoch": 0.03525314506384894,
+      "grad_norm": 0.055442556738853455,
+      "learning_rate": 2.970427372400353e-06,
+      "loss": 0.0807,
+      "step": 93
+    },
+    {
+      "epoch": 0.035632211139804304,
+      "grad_norm": 0.05043969675898552,
+      "learning_rate": 2.1852399266194314e-06,
+      "loss": 0.0741,
+      "step": 94
+    },
+    {
+      "epoch": 0.036011277215759675,
+      "grad_norm": 0.05411124229431152,
+      "learning_rate": 1.5192246987791981e-06,
+      "loss": 0.0741,
+      "step": 95
+    },
+    {
+      "epoch": 0.03639034329171504,
+      "grad_norm": 0.06104011833667755,
+      "learning_rate": 9.731931258429638e-07,
+      "loss": 0.0822,
+      "step": 96
+    },
+    {
+      "epoch": 0.036769409367670404,
+      "grad_norm": 0.05103430524468422,
+      "learning_rate": 5.478104631726711e-07,
+      "loss": 0.0716,
+      "step": 97
+    },
+    {
+      "epoch": 0.03714847544362577,
+      "grad_norm": 0.057701822370290756,
+      "learning_rate": 2.4359497401758024e-07,
+      "loss": 0.0767,
+      "step": 98
+    },
+    {
+      "epoch": 0.03752754151958113,
+      "grad_norm": 0.05738105624914169,
+      "learning_rate": 6.09172980904238e-08,
+      "loss": 0.0837,
+      "step": 99
+    },
+    {
+      "epoch": 0.0379066075955365,
+      "grad_norm": 0.05406734347343445,
+      "learning_rate": 0.0,
+      "loss": 0.0856,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.888051858648392e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null