kooff11's picture
Training in progress, step 36, checkpoint
0dd45cd verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0139372822299653,
"eval_steps": 9,
"global_step": 36,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.027874564459930314,
"grad_norm": 1.528730034828186,
"learning_rate": 5e-05,
"loss": 5.1122,
"step": 1
},
{
"epoch": 0.027874564459930314,
"eval_loss": 4.7186970710754395,
"eval_runtime": 1.266,
"eval_samples_per_second": 191.16,
"eval_steps_per_second": 48.185,
"step": 1
},
{
"epoch": 0.05574912891986063,
"grad_norm": 1.4118348360061646,
"learning_rate": 0.0001,
"loss": 4.8444,
"step": 2
},
{
"epoch": 0.08362369337979095,
"grad_norm": 1.4177184104919434,
"learning_rate": 9.978670881475172e-05,
"loss": 5.0323,
"step": 3
},
{
"epoch": 0.11149825783972125,
"grad_norm": 1.3121063709259033,
"learning_rate": 9.91486549841951e-05,
"loss": 4.9379,
"step": 4
},
{
"epoch": 0.13937282229965156,
"grad_norm": 1.1417440176010132,
"learning_rate": 9.809128215864097e-05,
"loss": 4.9118,
"step": 5
},
{
"epoch": 0.1672473867595819,
"grad_norm": 1.0766890048980713,
"learning_rate": 9.662361147021779e-05,
"loss": 4.8126,
"step": 6
},
{
"epoch": 0.1951219512195122,
"grad_norm": 1.0330865383148193,
"learning_rate": 9.475816456775313e-05,
"loss": 5.0199,
"step": 7
},
{
"epoch": 0.2229965156794425,
"grad_norm": 0.8478246331214905,
"learning_rate": 9.251085678648072e-05,
"loss": 4.7868,
"step": 8
},
{
"epoch": 0.2508710801393728,
"grad_norm": 0.7461872100830078,
"learning_rate": 8.9900861364012e-05,
"loss": 4.7503,
"step": 9
},
{
"epoch": 0.2508710801393728,
"eval_loss": 4.568607330322266,
"eval_runtime": 1.2588,
"eval_samples_per_second": 192.254,
"eval_steps_per_second": 48.461,
"step": 9
},
{
"epoch": 0.2787456445993031,
"grad_norm": 0.7740032076835632,
"learning_rate": 8.695044586103296e-05,
"loss": 4.6227,
"step": 10
},
{
"epoch": 0.30662020905923343,
"grad_norm": 0.7942211627960205,
"learning_rate": 8.368478218232787e-05,
"loss": 4.6837,
"step": 11
},
{
"epoch": 0.3344947735191638,
"grad_norm": 0.8654100894927979,
"learning_rate": 8.013173181896283e-05,
"loss": 4.6198,
"step": 12
},
{
"epoch": 0.3623693379790941,
"grad_norm": 0.9776397943496704,
"learning_rate": 7.63216081438678e-05,
"loss": 4.7598,
"step": 13
},
{
"epoch": 0.3902439024390244,
"grad_norm": 0.828058123588562,
"learning_rate": 7.228691778882693e-05,
"loss": 4.6796,
"step": 14
},
{
"epoch": 0.4181184668989547,
"grad_norm": 0.8717594146728516,
"learning_rate": 6.806208330935766e-05,
"loss": 4.5456,
"step": 15
},
{
"epoch": 0.445993031358885,
"grad_norm": 0.9016168117523193,
"learning_rate": 6.368314950360415e-05,
"loss": 4.7329,
"step": 16
},
{
"epoch": 0.4738675958188153,
"grad_norm": 0.9021468162536621,
"learning_rate": 5.918747589082853e-05,
"loss": 4.6807,
"step": 17
},
{
"epoch": 0.5017421602787456,
"grad_norm": 0.9048068523406982,
"learning_rate": 5.4613417973165106e-05,
"loss": 4.6001,
"step": 18
},
{
"epoch": 0.5017421602787456,
"eval_loss": 4.461349010467529,
"eval_runtime": 1.2645,
"eval_samples_per_second": 191.38,
"eval_steps_per_second": 48.24,
"step": 18
},
{
"epoch": 0.5296167247386759,
"grad_norm": 0.9348726868629456,
"learning_rate": 5e-05,
"loss": 4.7054,
"step": 19
},
{
"epoch": 0.5574912891986062,
"grad_norm": 0.8965240120887756,
"learning_rate": 4.5386582026834906e-05,
"loss": 4.7152,
"step": 20
},
{
"epoch": 0.5853658536585366,
"grad_norm": 0.8576871752738953,
"learning_rate": 4.0812524109171476e-05,
"loss": 4.629,
"step": 21
},
{
"epoch": 0.6132404181184669,
"grad_norm": 0.8105357885360718,
"learning_rate": 3.631685049639586e-05,
"loss": 4.6084,
"step": 22
},
{
"epoch": 0.6411149825783972,
"grad_norm": 0.7387930750846863,
"learning_rate": 3.1937916690642356e-05,
"loss": 4.5363,
"step": 23
},
{
"epoch": 0.6689895470383276,
"grad_norm": 0.8787197470664978,
"learning_rate": 2.771308221117309e-05,
"loss": 4.639,
"step": 24
},
{
"epoch": 0.6968641114982579,
"grad_norm": 0.8041818141937256,
"learning_rate": 2.3678391856132204e-05,
"loss": 4.583,
"step": 25
},
{
"epoch": 0.7247386759581882,
"grad_norm": 0.7042474746704102,
"learning_rate": 1.9868268181037185e-05,
"loss": 4.5931,
"step": 26
},
{
"epoch": 0.7526132404181185,
"grad_norm": 0.6868075132369995,
"learning_rate": 1.631521781767214e-05,
"loss": 4.5914,
"step": 27
},
{
"epoch": 0.7526132404181185,
"eval_loss": 4.399294376373291,
"eval_runtime": 1.2619,
"eval_samples_per_second": 191.767,
"eval_steps_per_second": 48.338,
"step": 27
},
{
"epoch": 0.7804878048780488,
"grad_norm": 0.6679636836051941,
"learning_rate": 1.3049554138967051e-05,
"loss": 4.5055,
"step": 28
},
{
"epoch": 0.8083623693379791,
"grad_norm": 0.7194696068763733,
"learning_rate": 1.0099138635988026e-05,
"loss": 4.5841,
"step": 29
},
{
"epoch": 0.8362369337979094,
"grad_norm": 0.7062932848930359,
"learning_rate": 7.489143213519301e-06,
"loss": 4.502,
"step": 30
},
{
"epoch": 0.8641114982578397,
"grad_norm": 0.7180935740470886,
"learning_rate": 5.241835432246889e-06,
"loss": 4.4559,
"step": 31
},
{
"epoch": 0.89198606271777,
"grad_norm": 0.702731192111969,
"learning_rate": 3.376388529782215e-06,
"loss": 4.6249,
"step": 32
},
{
"epoch": 0.9198606271777003,
"grad_norm": 0.7095968127250671,
"learning_rate": 1.908717841359048e-06,
"loss": 4.4653,
"step": 33
},
{
"epoch": 0.9477351916376306,
"grad_norm": 0.7153111696243286,
"learning_rate": 8.513450158049108e-07,
"loss": 4.542,
"step": 34
},
{
"epoch": 0.975609756097561,
"grad_norm": 0.7444437146186829,
"learning_rate": 2.1329118524827662e-07,
"loss": 4.5022,
"step": 35
},
{
"epoch": 1.0139372822299653,
"grad_norm": 1.0557011365890503,
"learning_rate": 0.0,
"loss": 6.8891,
"step": 36
},
{
"epoch": 1.0139372822299653,
"eval_loss": 4.388754367828369,
"eval_runtime": 1.2674,
"eval_samples_per_second": 190.94,
"eval_steps_per_second": 48.13,
"step": 36
}
],
"logging_steps": 1,
"max_steps": 36,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 9,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6149077112717312.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}