{ "best_metric": 1.7163910865783691, "best_model_checkpoint": "finetuning/output/electra-base-finetuned_xe_ey_fae/checkpoint-19000", "epoch": 2.642433616911575, "eval_steps": 500, "global_step": 20500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 9.786027326630576e-06, "loss": 2.5359, "step": 500 }, { "epoch": 0.06, "eval_accuracy": 0.6227738650589344, "eval_loss": 2.0696377754211426, "eval_runtime": 35.9348, "eval_samples_per_second": 432.033, "eval_steps_per_second": 54.015, "step": 500 }, { "epoch": 0.13, "learning_rate": 9.571195325255651e-06, "loss": 2.1807, "step": 1000 }, { "epoch": 0.13, "eval_accuracy": 0.6352025430222344, "eval_loss": 1.9677125215530396, "eval_runtime": 36.0518, "eval_samples_per_second": 430.631, "eval_steps_per_second": 53.839, "step": 1000 }, { "epoch": 0.19, "learning_rate": 9.356363323880726e-06, "loss": 2.1028, "step": 1500 }, { "epoch": 0.19, "eval_accuracy": 0.641511887420089, "eval_loss": 1.9191973209381104, "eval_runtime": 36.3057, "eval_samples_per_second": 427.619, "eval_steps_per_second": 53.463, "step": 1500 }, { "epoch": 0.26, "learning_rate": 9.141531322505801e-06, "loss": 2.0658, "step": 2000 }, { "epoch": 0.26, "eval_accuracy": 0.6450855805600152, "eval_loss": 1.892332673072815, "eval_runtime": 36.0414, "eval_samples_per_second": 430.754, "eval_steps_per_second": 53.855, "step": 2000 }, { "epoch": 0.32, "learning_rate": 8.926699321130876e-06, "loss": 2.0426, "step": 2500 }, { "epoch": 0.32, "eval_accuracy": 0.6478244526689617, "eval_loss": 1.8699322938919067, "eval_runtime": 36.0808, "eval_samples_per_second": 430.284, "eval_steps_per_second": 53.796, "step": 2500 }, { "epoch": 0.39, "learning_rate": 8.71186731975595e-06, "loss": 2.0133, "step": 3000 }, { "epoch": 0.39, "eval_accuracy": 0.6489956025492812, "eval_loss": 1.8580025434494019, "eval_runtime": 36.4103, "eval_samples_per_second": 426.391, "eval_steps_per_second": 53.309, "step": 3000 }, { "epoch": 0.45, "learning_rate": 8.497464982383777e-06, "loss": 1.9978, "step": 3500 }, { "epoch": 0.45, "eval_accuracy": 0.6506941121373793, "eval_loss": 1.8410626649856567, "eval_runtime": 36.1001, "eval_samples_per_second": 430.054, "eval_steps_per_second": 53.767, "step": 3500 }, { "epoch": 0.52, "learning_rate": 8.282632981008852e-06, "loss": 1.9862, "step": 4000 }, { "epoch": 0.52, "eval_accuracy": 0.6524157728010056, "eval_loss": 1.8297162055969238, "eval_runtime": 36.1019, "eval_samples_per_second": 430.032, "eval_steps_per_second": 53.764, "step": 4000 }, { "epoch": 0.58, "learning_rate": 8.068230643636676e-06, "loss": 1.9745, "step": 4500 }, { "epoch": 0.58, "eval_accuracy": 0.6545309828179512, "eval_loss": 1.8154131174087524, "eval_runtime": 36.0473, "eval_samples_per_second": 430.684, "eval_steps_per_second": 53.846, "step": 4500 }, { "epoch": 0.64, "learning_rate": 7.853398642261751e-06, "loss": 1.9606, "step": 5000 }, { "epoch": 0.64, "eval_accuracy": 0.6556562172935413, "eval_loss": 1.8056122064590454, "eval_runtime": 36.2735, "eval_samples_per_second": 427.999, "eval_steps_per_second": 53.51, "step": 5000 }, { "epoch": 0.71, "learning_rate": 7.638996304889577e-06, "loss": 1.9486, "step": 5500 }, { "epoch": 0.71, "eval_accuracy": 0.6560147022088998, "eval_loss": 1.8032631874084473, "eval_runtime": 36.0743, "eval_samples_per_second": 430.362, "eval_steps_per_second": 53.806, "step": 5500 }, { "epoch": 0.77, "learning_rate": 7.424164303514653e-06, "loss": 1.9416, "step": 6000 }, { "epoch": 0.77, "eval_accuracy": 0.6580551701728226, "eval_loss": 1.7894020080566406, "eval_runtime": 36.1654, "eval_samples_per_second": 429.278, "eval_steps_per_second": 53.67, "step": 6000 }, { "epoch": 0.84, "learning_rate": 7.209332302139728e-06, "loss": 1.9279, "step": 6500 }, { "epoch": 0.84, "eval_accuracy": 0.658183904138693, "eval_loss": 1.7848395109176636, "eval_runtime": 36.1173, "eval_samples_per_second": 429.849, "eval_steps_per_second": 53.742, "step": 6500 }, { "epoch": 0.9, "learning_rate": 6.9945003007648025e-06, "loss": 1.9196, "step": 7000 }, { "epoch": 0.9, "eval_accuracy": 0.6592785508757635, "eval_loss": 1.7786365747451782, "eval_runtime": 36.2739, "eval_samples_per_second": 427.994, "eval_steps_per_second": 53.51, "step": 7000 }, { "epoch": 0.97, "learning_rate": 6.779668299389877e-06, "loss": 1.9168, "step": 7500 }, { "epoch": 0.97, "eval_accuracy": 0.6591822827938671, "eval_loss": 1.7761502265930176, "eval_runtime": 36.6269, "eval_samples_per_second": 423.868, "eval_steps_per_second": 52.994, "step": 7500 }, { "epoch": 1.03, "learning_rate": 6.564836298014953e-06, "loss": 1.9123, "step": 8000 }, { "epoch": 1.03, "eval_accuracy": 0.6596853436378691, "eval_loss": 1.7743586301803589, "eval_runtime": 36.1389, "eval_samples_per_second": 429.592, "eval_steps_per_second": 53.709, "step": 8000 }, { "epoch": 1.1, "learning_rate": 6.350004296640028e-06, "loss": 1.8942, "step": 8500 }, { "epoch": 1.1, "eval_accuracy": 0.6610733402069573, "eval_loss": 1.7624884843826294, "eval_runtime": 36.0335, "eval_samples_per_second": 430.849, "eval_steps_per_second": 53.867, "step": 8500 }, { "epoch": 1.16, "learning_rate": 6.135172295265103e-06, "loss": 1.9053, "step": 9000 }, { "epoch": 1.16, "eval_accuracy": 0.662326418448169, "eval_loss": 1.7575763463974, "eval_runtime": 36.357, "eval_samples_per_second": 427.016, "eval_steps_per_second": 53.387, "step": 9000 }, { "epoch": 1.22, "learning_rate": 5.9203402938901785e-06, "loss": 1.898, "step": 9500 }, { "epoch": 1.22, "eval_accuracy": 0.6620202516286527, "eval_loss": 1.758821725845337, "eval_runtime": 36.1788, "eval_samples_per_second": 429.118, "eval_steps_per_second": 53.65, "step": 9500 }, { "epoch": 1.29, "learning_rate": 5.705508292515254e-06, "loss": 1.8896, "step": 10000 }, { "epoch": 1.29, "eval_accuracy": 0.6625110635175566, "eval_loss": 1.7518248558044434, "eval_runtime": 36.1554, "eval_samples_per_second": 429.396, "eval_steps_per_second": 53.685, "step": 10000 }, { "epoch": 1.35, "learning_rate": 5.490676291140329e-06, "loss": 1.8796, "step": 10500 }, { "epoch": 1.35, "eval_accuracy": 0.661861605044167, "eval_loss": 1.755669116973877, "eval_runtime": 36.1342, "eval_samples_per_second": 429.648, "eval_steps_per_second": 53.716, "step": 10500 }, { "epoch": 1.42, "learning_rate": 5.275844289765404e-06, "loss": 1.8838, "step": 11000 }, { "epoch": 1.42, "eval_accuracy": 0.6628265417860324, "eval_loss": 1.7511305809020996, "eval_runtime": 36.0117, "eval_samples_per_second": 431.11, "eval_steps_per_second": 53.899, "step": 11000 }, { "epoch": 1.48, "learning_rate": 5.061441952393229e-06, "loss": 1.8869, "step": 11500 }, { "epoch": 1.48, "eval_accuracy": 0.6639589859082099, "eval_loss": 1.7436553239822388, "eval_runtime": 36.239, "eval_samples_per_second": 428.406, "eval_steps_per_second": 53.561, "step": 11500 }, { "epoch": 1.55, "learning_rate": 4.846609951018304e-06, "loss": 1.8756, "step": 12000 }, { "epoch": 1.55, "eval_accuracy": 0.6641049700653768, "eval_loss": 1.742509126663208, "eval_runtime": 36.1208, "eval_samples_per_second": 429.808, "eval_steps_per_second": 53.736, "step": 12000 }, { "epoch": 1.61, "learning_rate": 4.631777949643379e-06, "loss": 1.8775, "step": 12500 }, { "epoch": 1.61, "eval_accuracy": 0.6640769398921977, "eval_loss": 1.7409285306930542, "eval_runtime": 36.1893, "eval_samples_per_second": 428.994, "eval_steps_per_second": 53.635, "step": 12500 }, { "epoch": 1.68, "learning_rate": 4.416945948268455e-06, "loss": 1.8757, "step": 13000 }, { "epoch": 1.68, "eval_accuracy": 0.664925807451965, "eval_loss": 1.7372323274612427, "eval_runtime": 36.0287, "eval_samples_per_second": 430.906, "eval_steps_per_second": 53.874, "step": 13000 }, { "epoch": 1.74, "learning_rate": 4.20254361089628e-06, "loss": 1.8616, "step": 13500 }, { "epoch": 1.74, "eval_accuracy": 0.6645522086560093, "eval_loss": 1.7387374639511108, "eval_runtime": 36.2158, "eval_samples_per_second": 428.68, "eval_steps_per_second": 53.595, "step": 13500 }, { "epoch": 1.8, "learning_rate": 3.987711609521355e-06, "loss": 1.8675, "step": 14000 }, { "epoch": 1.8, "eval_accuracy": 0.6648440628084251, "eval_loss": 1.7335091829299927, "eval_runtime": 36.1693, "eval_samples_per_second": 429.231, "eval_steps_per_second": 53.664, "step": 14000 }, { "epoch": 1.87, "learning_rate": 3.7728796081464296e-06, "loss": 1.8725, "step": 14500 }, { "epoch": 1.87, "eval_accuracy": 0.6660341443052158, "eval_loss": 1.728769302368164, "eval_runtime": 36.1159, "eval_samples_per_second": 429.866, "eval_steps_per_second": 53.744, "step": 14500 }, { "epoch": 1.93, "learning_rate": 3.558047606771505e-06, "loss": 1.8678, "step": 15000 }, { "epoch": 1.93, "eval_accuracy": 0.66591998939469, "eval_loss": 1.730508804321289, "eval_runtime": 36.0446, "eval_samples_per_second": 430.716, "eval_steps_per_second": 53.85, "step": 15000 }, { "epoch": 2.0, "learning_rate": 3.34321560539658e-06, "loss": 1.8611, "step": 15500 }, { "epoch": 2.0, "eval_accuracy": 0.6666247127717294, "eval_loss": 1.7255862951278687, "eval_runtime": 36.3664, "eval_samples_per_second": 426.905, "eval_steps_per_second": 53.373, "step": 15500 }, { "epoch": 2.06, "learning_rate": 3.1288132680244054e-06, "loss": 1.853, "step": 16000 }, { "epoch": 2.06, "eval_accuracy": 0.6661196617167527, "eval_loss": 1.7286032438278198, "eval_runtime": 36.0258, "eval_samples_per_second": 430.941, "eval_steps_per_second": 53.878, "step": 16000 }, { "epoch": 2.13, "learning_rate": 2.9139812666494803e-06, "loss": 1.8487, "step": 16500 }, { "epoch": 2.13, "eval_accuracy": 0.6658712718524595, "eval_loss": 1.7284834384918213, "eval_runtime": 36.2843, "eval_samples_per_second": 427.871, "eval_steps_per_second": 53.494, "step": 16500 }, { "epoch": 2.19, "learning_rate": 2.6991492652745556e-06, "loss": 1.8543, "step": 17000 }, { "epoch": 2.19, "eval_accuracy": 0.666799617645458, "eval_loss": 1.7229472398757935, "eval_runtime": 36.1382, "eval_samples_per_second": 429.601, "eval_steps_per_second": 53.71, "step": 17000 }, { "epoch": 2.26, "learning_rate": 2.484317263899631e-06, "loss": 1.8519, "step": 17500 }, { "epoch": 2.26, "eval_accuracy": 0.6669869788832046, "eval_loss": 1.7240232229232788, "eval_runtime": 36.2928, "eval_samples_per_second": 427.771, "eval_steps_per_second": 53.482, "step": 17500 }, { "epoch": 2.32, "learning_rate": 2.2694852625247057e-06, "loss": 1.851, "step": 18000 }, { "epoch": 2.32, "eval_accuracy": 0.6662479933850755, "eval_loss": 1.7275055646896362, "eval_runtime": 36.0625, "eval_samples_per_second": 430.502, "eval_steps_per_second": 53.823, "step": 18000 }, { "epoch": 2.38, "learning_rate": 2.054653261149781e-06, "loss": 1.8547, "step": 18500 }, { "epoch": 2.38, "eval_accuracy": 0.6672813284171724, "eval_loss": 1.7197449207305908, "eval_runtime": 36.3297, "eval_samples_per_second": 427.337, "eval_steps_per_second": 53.427, "step": 18500 }, { "epoch": 2.45, "learning_rate": 1.8398212597748563e-06, "loss": 1.8476, "step": 19000 }, { "epoch": 2.45, "eval_accuracy": 0.6674510477353122, "eval_loss": 1.7163910865783691, "eval_runtime": 36.0727, "eval_samples_per_second": 430.38, "eval_steps_per_second": 53.808, "step": 19000 }, { "epoch": 2.51, "learning_rate": 1.6249892583999314e-06, "loss": 1.8444, "step": 19500 }, { "epoch": 2.51, "eval_accuracy": 0.667601267022319, "eval_loss": 1.7213865518569946, "eval_runtime": 36.1639, "eval_samples_per_second": 429.296, "eval_steps_per_second": 53.672, "step": 19500 }, { "epoch": 2.58, "learning_rate": 1.4101572570250067e-06, "loss": 1.8544, "step": 20000 }, { "epoch": 2.58, "eval_accuracy": 0.6668017942219797, "eval_loss": 1.7217011451721191, "eval_runtime": 36.3821, "eval_samples_per_second": 426.721, "eval_steps_per_second": 53.35, "step": 20000 }, { "epoch": 2.64, "learning_rate": 1.1953252556500817e-06, "loss": 1.8491, "step": 20500 }, { "epoch": 2.64, "eval_accuracy": 0.6678062285854136, "eval_loss": 1.717513918876648, "eval_runtime": 36.0279, "eval_samples_per_second": 430.916, "eval_steps_per_second": 53.875, "step": 20500 }, { "epoch": 2.64, "step": 20500, "total_flos": 9980146705514496.0, "train_loss": 1.9300706578696647, "train_runtime": 3479.9405, "train_samples_per_second": 107.005, "train_steps_per_second": 6.688 } ], "logging_steps": 500, "max_steps": 23274, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 9980146705514496.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }