|
{ |
|
"best_metric": 1.7163910865783691, |
|
"best_model_checkpoint": "finetuning/output/electra-base-finetuned_xe_ey_fae/checkpoint-19000", |
|
"epoch": 2.642433616911575, |
|
"eval_steps": 500, |
|
"global_step": 20500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.786027326630576e-06, |
|
"loss": 2.5359, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.6227738650589344, |
|
"eval_loss": 2.0696377754211426, |
|
"eval_runtime": 35.9348, |
|
"eval_samples_per_second": 432.033, |
|
"eval_steps_per_second": 54.015, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.571195325255651e-06, |
|
"loss": 2.1807, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.6352025430222344, |
|
"eval_loss": 1.9677125215530396, |
|
"eval_runtime": 36.0518, |
|
"eval_samples_per_second": 430.631, |
|
"eval_steps_per_second": 53.839, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.356363323880726e-06, |
|
"loss": 2.1028, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.641511887420089, |
|
"eval_loss": 1.9191973209381104, |
|
"eval_runtime": 36.3057, |
|
"eval_samples_per_second": 427.619, |
|
"eval_steps_per_second": 53.463, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.141531322505801e-06, |
|
"loss": 2.0658, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.6450855805600152, |
|
"eval_loss": 1.892332673072815, |
|
"eval_runtime": 36.0414, |
|
"eval_samples_per_second": 430.754, |
|
"eval_steps_per_second": 53.855, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.926699321130876e-06, |
|
"loss": 2.0426, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.6478244526689617, |
|
"eval_loss": 1.8699322938919067, |
|
"eval_runtime": 36.0808, |
|
"eval_samples_per_second": 430.284, |
|
"eval_steps_per_second": 53.796, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.71186731975595e-06, |
|
"loss": 2.0133, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.6489956025492812, |
|
"eval_loss": 1.8580025434494019, |
|
"eval_runtime": 36.4103, |
|
"eval_samples_per_second": 426.391, |
|
"eval_steps_per_second": 53.309, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.497464982383777e-06, |
|
"loss": 1.9978, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.6506941121373793, |
|
"eval_loss": 1.8410626649856567, |
|
"eval_runtime": 36.1001, |
|
"eval_samples_per_second": 430.054, |
|
"eval_steps_per_second": 53.767, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.282632981008852e-06, |
|
"loss": 1.9862, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.6524157728010056, |
|
"eval_loss": 1.8297162055969238, |
|
"eval_runtime": 36.1019, |
|
"eval_samples_per_second": 430.032, |
|
"eval_steps_per_second": 53.764, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.068230643636676e-06, |
|
"loss": 1.9745, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.6545309828179512, |
|
"eval_loss": 1.8154131174087524, |
|
"eval_runtime": 36.0473, |
|
"eval_samples_per_second": 430.684, |
|
"eval_steps_per_second": 53.846, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.853398642261751e-06, |
|
"loss": 1.9606, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.6556562172935413, |
|
"eval_loss": 1.8056122064590454, |
|
"eval_runtime": 36.2735, |
|
"eval_samples_per_second": 427.999, |
|
"eval_steps_per_second": 53.51, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.638996304889577e-06, |
|
"loss": 1.9486, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.6560147022088998, |
|
"eval_loss": 1.8032631874084473, |
|
"eval_runtime": 36.0743, |
|
"eval_samples_per_second": 430.362, |
|
"eval_steps_per_second": 53.806, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.424164303514653e-06, |
|
"loss": 1.9416, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.6580551701728226, |
|
"eval_loss": 1.7894020080566406, |
|
"eval_runtime": 36.1654, |
|
"eval_samples_per_second": 429.278, |
|
"eval_steps_per_second": 53.67, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.209332302139728e-06, |
|
"loss": 1.9279, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.658183904138693, |
|
"eval_loss": 1.7848395109176636, |
|
"eval_runtime": 36.1173, |
|
"eval_samples_per_second": 429.849, |
|
"eval_steps_per_second": 53.742, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.9945003007648025e-06, |
|
"loss": 1.9196, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.6592785508757635, |
|
"eval_loss": 1.7786365747451782, |
|
"eval_runtime": 36.2739, |
|
"eval_samples_per_second": 427.994, |
|
"eval_steps_per_second": 53.51, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.779668299389877e-06, |
|
"loss": 1.9168, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.6591822827938671, |
|
"eval_loss": 1.7761502265930176, |
|
"eval_runtime": 36.6269, |
|
"eval_samples_per_second": 423.868, |
|
"eval_steps_per_second": 52.994, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.564836298014953e-06, |
|
"loss": 1.9123, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.6596853436378691, |
|
"eval_loss": 1.7743586301803589, |
|
"eval_runtime": 36.1389, |
|
"eval_samples_per_second": 429.592, |
|
"eval_steps_per_second": 53.709, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.350004296640028e-06, |
|
"loss": 1.8942, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.6610733402069573, |
|
"eval_loss": 1.7624884843826294, |
|
"eval_runtime": 36.0335, |
|
"eval_samples_per_second": 430.849, |
|
"eval_steps_per_second": 53.867, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.135172295265103e-06, |
|
"loss": 1.9053, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_accuracy": 0.662326418448169, |
|
"eval_loss": 1.7575763463974, |
|
"eval_runtime": 36.357, |
|
"eval_samples_per_second": 427.016, |
|
"eval_steps_per_second": 53.387, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.9203402938901785e-06, |
|
"loss": 1.898, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.6620202516286527, |
|
"eval_loss": 1.758821725845337, |
|
"eval_runtime": 36.1788, |
|
"eval_samples_per_second": 429.118, |
|
"eval_steps_per_second": 53.65, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.705508292515254e-06, |
|
"loss": 1.8896, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.6625110635175566, |
|
"eval_loss": 1.7518248558044434, |
|
"eval_runtime": 36.1554, |
|
"eval_samples_per_second": 429.396, |
|
"eval_steps_per_second": 53.685, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.490676291140329e-06, |
|
"loss": 1.8796, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.661861605044167, |
|
"eval_loss": 1.755669116973877, |
|
"eval_runtime": 36.1342, |
|
"eval_samples_per_second": 429.648, |
|
"eval_steps_per_second": 53.716, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.275844289765404e-06, |
|
"loss": 1.8838, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_accuracy": 0.6628265417860324, |
|
"eval_loss": 1.7511305809020996, |
|
"eval_runtime": 36.0117, |
|
"eval_samples_per_second": 431.11, |
|
"eval_steps_per_second": 53.899, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.061441952393229e-06, |
|
"loss": 1.8869, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.6639589859082099, |
|
"eval_loss": 1.7436553239822388, |
|
"eval_runtime": 36.239, |
|
"eval_samples_per_second": 428.406, |
|
"eval_steps_per_second": 53.561, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.846609951018304e-06, |
|
"loss": 1.8756, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.6641049700653768, |
|
"eval_loss": 1.742509126663208, |
|
"eval_runtime": 36.1208, |
|
"eval_samples_per_second": 429.808, |
|
"eval_steps_per_second": 53.736, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.631777949643379e-06, |
|
"loss": 1.8775, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.6640769398921977, |
|
"eval_loss": 1.7409285306930542, |
|
"eval_runtime": 36.1893, |
|
"eval_samples_per_second": 428.994, |
|
"eval_steps_per_second": 53.635, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.416945948268455e-06, |
|
"loss": 1.8757, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.664925807451965, |
|
"eval_loss": 1.7372323274612427, |
|
"eval_runtime": 36.0287, |
|
"eval_samples_per_second": 430.906, |
|
"eval_steps_per_second": 53.874, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.20254361089628e-06, |
|
"loss": 1.8616, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.6645522086560093, |
|
"eval_loss": 1.7387374639511108, |
|
"eval_runtime": 36.2158, |
|
"eval_samples_per_second": 428.68, |
|
"eval_steps_per_second": 53.595, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.987711609521355e-06, |
|
"loss": 1.8675, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.6648440628084251, |
|
"eval_loss": 1.7335091829299927, |
|
"eval_runtime": 36.1693, |
|
"eval_samples_per_second": 429.231, |
|
"eval_steps_per_second": 53.664, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.7728796081464296e-06, |
|
"loss": 1.8725, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.6660341443052158, |
|
"eval_loss": 1.728769302368164, |
|
"eval_runtime": 36.1159, |
|
"eval_samples_per_second": 429.866, |
|
"eval_steps_per_second": 53.744, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.558047606771505e-06, |
|
"loss": 1.8678, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_accuracy": 0.66591998939469, |
|
"eval_loss": 1.730508804321289, |
|
"eval_runtime": 36.0446, |
|
"eval_samples_per_second": 430.716, |
|
"eval_steps_per_second": 53.85, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.34321560539658e-06, |
|
"loss": 1.8611, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6666247127717294, |
|
"eval_loss": 1.7255862951278687, |
|
"eval_runtime": 36.3664, |
|
"eval_samples_per_second": 426.905, |
|
"eval_steps_per_second": 53.373, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.1288132680244054e-06, |
|
"loss": 1.853, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_accuracy": 0.6661196617167527, |
|
"eval_loss": 1.7286032438278198, |
|
"eval_runtime": 36.0258, |
|
"eval_samples_per_second": 430.941, |
|
"eval_steps_per_second": 53.878, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.9139812666494803e-06, |
|
"loss": 1.8487, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy": 0.6658712718524595, |
|
"eval_loss": 1.7284834384918213, |
|
"eval_runtime": 36.2843, |
|
"eval_samples_per_second": 427.871, |
|
"eval_steps_per_second": 53.494, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.6991492652745556e-06, |
|
"loss": 1.8543, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.666799617645458, |
|
"eval_loss": 1.7229472398757935, |
|
"eval_runtime": 36.1382, |
|
"eval_samples_per_second": 429.601, |
|
"eval_steps_per_second": 53.71, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.484317263899631e-06, |
|
"loss": 1.8519, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.6669869788832046, |
|
"eval_loss": 1.7240232229232788, |
|
"eval_runtime": 36.2928, |
|
"eval_samples_per_second": 427.771, |
|
"eval_steps_per_second": 53.482, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.2694852625247057e-06, |
|
"loss": 1.851, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_accuracy": 0.6662479933850755, |
|
"eval_loss": 1.7275055646896362, |
|
"eval_runtime": 36.0625, |
|
"eval_samples_per_second": 430.502, |
|
"eval_steps_per_second": 53.823, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.054653261149781e-06, |
|
"loss": 1.8547, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.6672813284171724, |
|
"eval_loss": 1.7197449207305908, |
|
"eval_runtime": 36.3297, |
|
"eval_samples_per_second": 427.337, |
|
"eval_steps_per_second": 53.427, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8398212597748563e-06, |
|
"loss": 1.8476, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.6674510477353122, |
|
"eval_loss": 1.7163910865783691, |
|
"eval_runtime": 36.0727, |
|
"eval_samples_per_second": 430.38, |
|
"eval_steps_per_second": 53.808, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6249892583999314e-06, |
|
"loss": 1.8444, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.667601267022319, |
|
"eval_loss": 1.7213865518569946, |
|
"eval_runtime": 36.1639, |
|
"eval_samples_per_second": 429.296, |
|
"eval_steps_per_second": 53.672, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.4101572570250067e-06, |
|
"loss": 1.8544, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_accuracy": 0.6668017942219797, |
|
"eval_loss": 1.7217011451721191, |
|
"eval_runtime": 36.3821, |
|
"eval_samples_per_second": 426.721, |
|
"eval_steps_per_second": 53.35, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.1953252556500817e-06, |
|
"loss": 1.8491, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_accuracy": 0.6678062285854136, |
|
"eval_loss": 1.717513918876648, |
|
"eval_runtime": 36.0279, |
|
"eval_samples_per_second": 430.916, |
|
"eval_steps_per_second": 53.875, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"step": 20500, |
|
"total_flos": 9980146705514496.0, |
|
"train_loss": 1.9300706578696647, |
|
"train_runtime": 3479.9405, |
|
"train_samples_per_second": 107.005, |
|
"train_steps_per_second": 6.688 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 23274, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 9980146705514496.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|