electra-finetuned / trainer_state.json
tejaskamtam's picture
End of training
97121e7 verified
{
"best_metric": 1.7163910865783691,
"best_model_checkpoint": "finetuning/output/electra-base-finetuned_xe_ey_fae/checkpoint-19000",
"epoch": 2.642433616911575,
"eval_steps": 500,
"global_step": 20500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 9.786027326630576e-06,
"loss": 2.5359,
"step": 500
},
{
"epoch": 0.06,
"eval_accuracy": 0.6227738650589344,
"eval_loss": 2.0696377754211426,
"eval_runtime": 35.9348,
"eval_samples_per_second": 432.033,
"eval_steps_per_second": 54.015,
"step": 500
},
{
"epoch": 0.13,
"learning_rate": 9.571195325255651e-06,
"loss": 2.1807,
"step": 1000
},
{
"epoch": 0.13,
"eval_accuracy": 0.6352025430222344,
"eval_loss": 1.9677125215530396,
"eval_runtime": 36.0518,
"eval_samples_per_second": 430.631,
"eval_steps_per_second": 53.839,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 9.356363323880726e-06,
"loss": 2.1028,
"step": 1500
},
{
"epoch": 0.19,
"eval_accuracy": 0.641511887420089,
"eval_loss": 1.9191973209381104,
"eval_runtime": 36.3057,
"eval_samples_per_second": 427.619,
"eval_steps_per_second": 53.463,
"step": 1500
},
{
"epoch": 0.26,
"learning_rate": 9.141531322505801e-06,
"loss": 2.0658,
"step": 2000
},
{
"epoch": 0.26,
"eval_accuracy": 0.6450855805600152,
"eval_loss": 1.892332673072815,
"eval_runtime": 36.0414,
"eval_samples_per_second": 430.754,
"eval_steps_per_second": 53.855,
"step": 2000
},
{
"epoch": 0.32,
"learning_rate": 8.926699321130876e-06,
"loss": 2.0426,
"step": 2500
},
{
"epoch": 0.32,
"eval_accuracy": 0.6478244526689617,
"eval_loss": 1.8699322938919067,
"eval_runtime": 36.0808,
"eval_samples_per_second": 430.284,
"eval_steps_per_second": 53.796,
"step": 2500
},
{
"epoch": 0.39,
"learning_rate": 8.71186731975595e-06,
"loss": 2.0133,
"step": 3000
},
{
"epoch": 0.39,
"eval_accuracy": 0.6489956025492812,
"eval_loss": 1.8580025434494019,
"eval_runtime": 36.4103,
"eval_samples_per_second": 426.391,
"eval_steps_per_second": 53.309,
"step": 3000
},
{
"epoch": 0.45,
"learning_rate": 8.497464982383777e-06,
"loss": 1.9978,
"step": 3500
},
{
"epoch": 0.45,
"eval_accuracy": 0.6506941121373793,
"eval_loss": 1.8410626649856567,
"eval_runtime": 36.1001,
"eval_samples_per_second": 430.054,
"eval_steps_per_second": 53.767,
"step": 3500
},
{
"epoch": 0.52,
"learning_rate": 8.282632981008852e-06,
"loss": 1.9862,
"step": 4000
},
{
"epoch": 0.52,
"eval_accuracy": 0.6524157728010056,
"eval_loss": 1.8297162055969238,
"eval_runtime": 36.1019,
"eval_samples_per_second": 430.032,
"eval_steps_per_second": 53.764,
"step": 4000
},
{
"epoch": 0.58,
"learning_rate": 8.068230643636676e-06,
"loss": 1.9745,
"step": 4500
},
{
"epoch": 0.58,
"eval_accuracy": 0.6545309828179512,
"eval_loss": 1.8154131174087524,
"eval_runtime": 36.0473,
"eval_samples_per_second": 430.684,
"eval_steps_per_second": 53.846,
"step": 4500
},
{
"epoch": 0.64,
"learning_rate": 7.853398642261751e-06,
"loss": 1.9606,
"step": 5000
},
{
"epoch": 0.64,
"eval_accuracy": 0.6556562172935413,
"eval_loss": 1.8056122064590454,
"eval_runtime": 36.2735,
"eval_samples_per_second": 427.999,
"eval_steps_per_second": 53.51,
"step": 5000
},
{
"epoch": 0.71,
"learning_rate": 7.638996304889577e-06,
"loss": 1.9486,
"step": 5500
},
{
"epoch": 0.71,
"eval_accuracy": 0.6560147022088998,
"eval_loss": 1.8032631874084473,
"eval_runtime": 36.0743,
"eval_samples_per_second": 430.362,
"eval_steps_per_second": 53.806,
"step": 5500
},
{
"epoch": 0.77,
"learning_rate": 7.424164303514653e-06,
"loss": 1.9416,
"step": 6000
},
{
"epoch": 0.77,
"eval_accuracy": 0.6580551701728226,
"eval_loss": 1.7894020080566406,
"eval_runtime": 36.1654,
"eval_samples_per_second": 429.278,
"eval_steps_per_second": 53.67,
"step": 6000
},
{
"epoch": 0.84,
"learning_rate": 7.209332302139728e-06,
"loss": 1.9279,
"step": 6500
},
{
"epoch": 0.84,
"eval_accuracy": 0.658183904138693,
"eval_loss": 1.7848395109176636,
"eval_runtime": 36.1173,
"eval_samples_per_second": 429.849,
"eval_steps_per_second": 53.742,
"step": 6500
},
{
"epoch": 0.9,
"learning_rate": 6.9945003007648025e-06,
"loss": 1.9196,
"step": 7000
},
{
"epoch": 0.9,
"eval_accuracy": 0.6592785508757635,
"eval_loss": 1.7786365747451782,
"eval_runtime": 36.2739,
"eval_samples_per_second": 427.994,
"eval_steps_per_second": 53.51,
"step": 7000
},
{
"epoch": 0.97,
"learning_rate": 6.779668299389877e-06,
"loss": 1.9168,
"step": 7500
},
{
"epoch": 0.97,
"eval_accuracy": 0.6591822827938671,
"eval_loss": 1.7761502265930176,
"eval_runtime": 36.6269,
"eval_samples_per_second": 423.868,
"eval_steps_per_second": 52.994,
"step": 7500
},
{
"epoch": 1.03,
"learning_rate": 6.564836298014953e-06,
"loss": 1.9123,
"step": 8000
},
{
"epoch": 1.03,
"eval_accuracy": 0.6596853436378691,
"eval_loss": 1.7743586301803589,
"eval_runtime": 36.1389,
"eval_samples_per_second": 429.592,
"eval_steps_per_second": 53.709,
"step": 8000
},
{
"epoch": 1.1,
"learning_rate": 6.350004296640028e-06,
"loss": 1.8942,
"step": 8500
},
{
"epoch": 1.1,
"eval_accuracy": 0.6610733402069573,
"eval_loss": 1.7624884843826294,
"eval_runtime": 36.0335,
"eval_samples_per_second": 430.849,
"eval_steps_per_second": 53.867,
"step": 8500
},
{
"epoch": 1.16,
"learning_rate": 6.135172295265103e-06,
"loss": 1.9053,
"step": 9000
},
{
"epoch": 1.16,
"eval_accuracy": 0.662326418448169,
"eval_loss": 1.7575763463974,
"eval_runtime": 36.357,
"eval_samples_per_second": 427.016,
"eval_steps_per_second": 53.387,
"step": 9000
},
{
"epoch": 1.22,
"learning_rate": 5.9203402938901785e-06,
"loss": 1.898,
"step": 9500
},
{
"epoch": 1.22,
"eval_accuracy": 0.6620202516286527,
"eval_loss": 1.758821725845337,
"eval_runtime": 36.1788,
"eval_samples_per_second": 429.118,
"eval_steps_per_second": 53.65,
"step": 9500
},
{
"epoch": 1.29,
"learning_rate": 5.705508292515254e-06,
"loss": 1.8896,
"step": 10000
},
{
"epoch": 1.29,
"eval_accuracy": 0.6625110635175566,
"eval_loss": 1.7518248558044434,
"eval_runtime": 36.1554,
"eval_samples_per_second": 429.396,
"eval_steps_per_second": 53.685,
"step": 10000
},
{
"epoch": 1.35,
"learning_rate": 5.490676291140329e-06,
"loss": 1.8796,
"step": 10500
},
{
"epoch": 1.35,
"eval_accuracy": 0.661861605044167,
"eval_loss": 1.755669116973877,
"eval_runtime": 36.1342,
"eval_samples_per_second": 429.648,
"eval_steps_per_second": 53.716,
"step": 10500
},
{
"epoch": 1.42,
"learning_rate": 5.275844289765404e-06,
"loss": 1.8838,
"step": 11000
},
{
"epoch": 1.42,
"eval_accuracy": 0.6628265417860324,
"eval_loss": 1.7511305809020996,
"eval_runtime": 36.0117,
"eval_samples_per_second": 431.11,
"eval_steps_per_second": 53.899,
"step": 11000
},
{
"epoch": 1.48,
"learning_rate": 5.061441952393229e-06,
"loss": 1.8869,
"step": 11500
},
{
"epoch": 1.48,
"eval_accuracy": 0.6639589859082099,
"eval_loss": 1.7436553239822388,
"eval_runtime": 36.239,
"eval_samples_per_second": 428.406,
"eval_steps_per_second": 53.561,
"step": 11500
},
{
"epoch": 1.55,
"learning_rate": 4.846609951018304e-06,
"loss": 1.8756,
"step": 12000
},
{
"epoch": 1.55,
"eval_accuracy": 0.6641049700653768,
"eval_loss": 1.742509126663208,
"eval_runtime": 36.1208,
"eval_samples_per_second": 429.808,
"eval_steps_per_second": 53.736,
"step": 12000
},
{
"epoch": 1.61,
"learning_rate": 4.631777949643379e-06,
"loss": 1.8775,
"step": 12500
},
{
"epoch": 1.61,
"eval_accuracy": 0.6640769398921977,
"eval_loss": 1.7409285306930542,
"eval_runtime": 36.1893,
"eval_samples_per_second": 428.994,
"eval_steps_per_second": 53.635,
"step": 12500
},
{
"epoch": 1.68,
"learning_rate": 4.416945948268455e-06,
"loss": 1.8757,
"step": 13000
},
{
"epoch": 1.68,
"eval_accuracy": 0.664925807451965,
"eval_loss": 1.7372323274612427,
"eval_runtime": 36.0287,
"eval_samples_per_second": 430.906,
"eval_steps_per_second": 53.874,
"step": 13000
},
{
"epoch": 1.74,
"learning_rate": 4.20254361089628e-06,
"loss": 1.8616,
"step": 13500
},
{
"epoch": 1.74,
"eval_accuracy": 0.6645522086560093,
"eval_loss": 1.7387374639511108,
"eval_runtime": 36.2158,
"eval_samples_per_second": 428.68,
"eval_steps_per_second": 53.595,
"step": 13500
},
{
"epoch": 1.8,
"learning_rate": 3.987711609521355e-06,
"loss": 1.8675,
"step": 14000
},
{
"epoch": 1.8,
"eval_accuracy": 0.6648440628084251,
"eval_loss": 1.7335091829299927,
"eval_runtime": 36.1693,
"eval_samples_per_second": 429.231,
"eval_steps_per_second": 53.664,
"step": 14000
},
{
"epoch": 1.87,
"learning_rate": 3.7728796081464296e-06,
"loss": 1.8725,
"step": 14500
},
{
"epoch": 1.87,
"eval_accuracy": 0.6660341443052158,
"eval_loss": 1.728769302368164,
"eval_runtime": 36.1159,
"eval_samples_per_second": 429.866,
"eval_steps_per_second": 53.744,
"step": 14500
},
{
"epoch": 1.93,
"learning_rate": 3.558047606771505e-06,
"loss": 1.8678,
"step": 15000
},
{
"epoch": 1.93,
"eval_accuracy": 0.66591998939469,
"eval_loss": 1.730508804321289,
"eval_runtime": 36.0446,
"eval_samples_per_second": 430.716,
"eval_steps_per_second": 53.85,
"step": 15000
},
{
"epoch": 2.0,
"learning_rate": 3.34321560539658e-06,
"loss": 1.8611,
"step": 15500
},
{
"epoch": 2.0,
"eval_accuracy": 0.6666247127717294,
"eval_loss": 1.7255862951278687,
"eval_runtime": 36.3664,
"eval_samples_per_second": 426.905,
"eval_steps_per_second": 53.373,
"step": 15500
},
{
"epoch": 2.06,
"learning_rate": 3.1288132680244054e-06,
"loss": 1.853,
"step": 16000
},
{
"epoch": 2.06,
"eval_accuracy": 0.6661196617167527,
"eval_loss": 1.7286032438278198,
"eval_runtime": 36.0258,
"eval_samples_per_second": 430.941,
"eval_steps_per_second": 53.878,
"step": 16000
},
{
"epoch": 2.13,
"learning_rate": 2.9139812666494803e-06,
"loss": 1.8487,
"step": 16500
},
{
"epoch": 2.13,
"eval_accuracy": 0.6658712718524595,
"eval_loss": 1.7284834384918213,
"eval_runtime": 36.2843,
"eval_samples_per_second": 427.871,
"eval_steps_per_second": 53.494,
"step": 16500
},
{
"epoch": 2.19,
"learning_rate": 2.6991492652745556e-06,
"loss": 1.8543,
"step": 17000
},
{
"epoch": 2.19,
"eval_accuracy": 0.666799617645458,
"eval_loss": 1.7229472398757935,
"eval_runtime": 36.1382,
"eval_samples_per_second": 429.601,
"eval_steps_per_second": 53.71,
"step": 17000
},
{
"epoch": 2.26,
"learning_rate": 2.484317263899631e-06,
"loss": 1.8519,
"step": 17500
},
{
"epoch": 2.26,
"eval_accuracy": 0.6669869788832046,
"eval_loss": 1.7240232229232788,
"eval_runtime": 36.2928,
"eval_samples_per_second": 427.771,
"eval_steps_per_second": 53.482,
"step": 17500
},
{
"epoch": 2.32,
"learning_rate": 2.2694852625247057e-06,
"loss": 1.851,
"step": 18000
},
{
"epoch": 2.32,
"eval_accuracy": 0.6662479933850755,
"eval_loss": 1.7275055646896362,
"eval_runtime": 36.0625,
"eval_samples_per_second": 430.502,
"eval_steps_per_second": 53.823,
"step": 18000
},
{
"epoch": 2.38,
"learning_rate": 2.054653261149781e-06,
"loss": 1.8547,
"step": 18500
},
{
"epoch": 2.38,
"eval_accuracy": 0.6672813284171724,
"eval_loss": 1.7197449207305908,
"eval_runtime": 36.3297,
"eval_samples_per_second": 427.337,
"eval_steps_per_second": 53.427,
"step": 18500
},
{
"epoch": 2.45,
"learning_rate": 1.8398212597748563e-06,
"loss": 1.8476,
"step": 19000
},
{
"epoch": 2.45,
"eval_accuracy": 0.6674510477353122,
"eval_loss": 1.7163910865783691,
"eval_runtime": 36.0727,
"eval_samples_per_second": 430.38,
"eval_steps_per_second": 53.808,
"step": 19000
},
{
"epoch": 2.51,
"learning_rate": 1.6249892583999314e-06,
"loss": 1.8444,
"step": 19500
},
{
"epoch": 2.51,
"eval_accuracy": 0.667601267022319,
"eval_loss": 1.7213865518569946,
"eval_runtime": 36.1639,
"eval_samples_per_second": 429.296,
"eval_steps_per_second": 53.672,
"step": 19500
},
{
"epoch": 2.58,
"learning_rate": 1.4101572570250067e-06,
"loss": 1.8544,
"step": 20000
},
{
"epoch": 2.58,
"eval_accuracy": 0.6668017942219797,
"eval_loss": 1.7217011451721191,
"eval_runtime": 36.3821,
"eval_samples_per_second": 426.721,
"eval_steps_per_second": 53.35,
"step": 20000
},
{
"epoch": 2.64,
"learning_rate": 1.1953252556500817e-06,
"loss": 1.8491,
"step": 20500
},
{
"epoch": 2.64,
"eval_accuracy": 0.6678062285854136,
"eval_loss": 1.717513918876648,
"eval_runtime": 36.0279,
"eval_samples_per_second": 430.916,
"eval_steps_per_second": 53.875,
"step": 20500
},
{
"epoch": 2.64,
"step": 20500,
"total_flos": 9980146705514496.0,
"train_loss": 1.9300706578696647,
"train_runtime": 3479.9405,
"train_samples_per_second": 107.005,
"train_steps_per_second": 6.688
}
],
"logging_steps": 500,
"max_steps": 23274,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 9980146705514496.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}