{ "best_metric": 0.6131083965301514, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_cola/checkpoint-238", "epoch": 12.0, "global_step": 408, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.6197, "step": 34 }, { "epoch": 1.0, "eval_loss": 0.6238651871681213, "eval_matthews_correlation": 0.0, "eval_runtime": 1.0726, "eval_samples_per_second": 972.413, "eval_steps_per_second": 4.662, "step": 34 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.6078, "step": 68 }, { "epoch": 2.0, "eval_loss": 0.6178815364837646, "eval_matthews_correlation": 0.0, "eval_runtime": 1.0582, "eval_samples_per_second": 985.593, "eval_steps_per_second": 4.725, "step": 68 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.6064, "step": 102 }, { "epoch": 3.0, "eval_loss": 0.6179934144020081, "eval_matthews_correlation": 0.0, "eval_runtime": 1.3387, "eval_samples_per_second": 779.113, "eval_steps_per_second": 3.735, "step": 102 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.6073, "step": 136 }, { "epoch": 4.0, "eval_loss": 0.6175711750984192, "eval_matthews_correlation": 0.0, "eval_runtime": 1.1189, "eval_samples_per_second": 932.188, "eval_steps_per_second": 4.469, "step": 136 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.6069, "step": 170 }, { "epoch": 5.0, "eval_loss": 0.6172661781311035, "eval_matthews_correlation": 0.0, "eval_runtime": 1.1205, "eval_samples_per_second": 930.859, "eval_steps_per_second": 4.462, "step": 170 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.6043, "step": 204 }, { "epoch": 6.0, "eval_loss": 0.6166184544563293, "eval_matthews_correlation": 0.0, "eval_runtime": 1.2344, "eval_samples_per_second": 844.955, "eval_steps_per_second": 4.051, "step": 204 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.6004, "step": 238 }, { "epoch": 7.0, "eval_loss": 0.6131083965301514, "eval_matthews_correlation": 0.0, "eval_runtime": 1.1421, "eval_samples_per_second": 913.231, "eval_steps_per_second": 4.378, "step": 238 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.5842, "step": 272 }, { "epoch": 8.0, "eval_loss": 0.6240708231925964, "eval_matthews_correlation": 0.0951039122870703, "eval_runtime": 1.1486, "eval_samples_per_second": 908.065, "eval_steps_per_second": 4.353, "step": 272 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.5192, "step": 306 }, { "epoch": 9.0, "eval_loss": 0.6361746191978455, "eval_matthews_correlation": 0.059760920069176514, "eval_runtime": 1.2007, "eval_samples_per_second": 868.661, "eval_steps_per_second": 4.164, "step": 306 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.4884, "step": 340 }, { "epoch": 10.0, "eval_loss": 0.7009902596473694, "eval_matthews_correlation": 0.08008155523655092, "eval_runtime": 1.1475, "eval_samples_per_second": 908.97, "eval_steps_per_second": 4.357, "step": 340 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.4559, "step": 374 }, { "epoch": 11.0, "eval_loss": 0.6731011867523193, "eval_matthews_correlation": 0.09051190856095573, "eval_runtime": 1.1134, "eval_samples_per_second": 936.763, "eval_steps_per_second": 4.491, "step": 374 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.4367, "step": 408 }, { "epoch": 12.0, "eval_loss": 0.6893478035926819, "eval_matthews_correlation": 0.09007205990892461, "eval_runtime": 1.2098, "eval_samples_per_second": 862.13, "eval_steps_per_second": 4.133, "step": 408 }, { "epoch": 12.0, "step": 408, "total_flos": 3258721140473856.0, "train_loss": 0.5614397408915501, "train_runtime": 434.3841, "train_samples_per_second": 984.267, "train_steps_per_second": 3.914 } ], "max_steps": 1700, "num_train_epochs": 50, "total_flos": 3258721140473856.0, "trial_name": null, "trial_params": null }