{ "best_metric": 0.6131083965301514, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_cola/checkpoint-238", "epoch": 12.0, "global_step": 408, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.6197, "step": 34 }, { "epoch": 1.0, "eval_loss": 0.6238651871681213, "eval_matthews_correlation": 0.0, "eval_runtime": 1.5899, "eval_samples_per_second": 656.009, "eval_steps_per_second": 3.145, "step": 34 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.6078, "step": 68 }, { "epoch": 2.0, "eval_loss": 0.6178815364837646, "eval_matthews_correlation": 0.0, "eval_runtime": 1.5812, "eval_samples_per_second": 659.606, "eval_steps_per_second": 3.162, "step": 68 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.6064, "step": 102 }, { "epoch": 3.0, "eval_loss": 0.6179934144020081, "eval_matthews_correlation": 0.0, "eval_runtime": 2.4438, "eval_samples_per_second": 426.802, "eval_steps_per_second": 2.046, "step": 102 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.6073, "step": 136 }, { "epoch": 4.0, "eval_loss": 0.6175711750984192, "eval_matthews_correlation": 0.0, "eval_runtime": 1.7323, "eval_samples_per_second": 602.096, "eval_steps_per_second": 2.886, "step": 136 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.6069, "step": 170 }, { "epoch": 5.0, "eval_loss": 0.6172661781311035, "eval_matthews_correlation": 0.0, "eval_runtime": 1.7156, "eval_samples_per_second": 607.942, "eval_steps_per_second": 2.914, "step": 170 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.6043, "step": 204 }, { "epoch": 6.0, "eval_loss": 0.6166184544563293, "eval_matthews_correlation": 0.0, "eval_runtime": 1.9408, "eval_samples_per_second": 537.405, "eval_steps_per_second": 2.576, "step": 204 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.6004, "step": 238 }, { "epoch": 7.0, "eval_loss": 0.6131083965301514, "eval_matthews_correlation": 0.0, "eval_runtime": 1.7507, "eval_samples_per_second": 595.751, "eval_steps_per_second": 2.856, "step": 238 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.5842, "step": 272 }, { "epoch": 8.0, "eval_loss": 0.6240708231925964, "eval_matthews_correlation": 0.0951039122870703, "eval_runtime": 1.7156, "eval_samples_per_second": 607.967, "eval_steps_per_second": 2.915, "step": 272 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.5192, "step": 306 }, { "epoch": 9.0, "eval_loss": 0.6361746191978455, "eval_matthews_correlation": 0.059760920069176514, "eval_runtime": 1.9833, "eval_samples_per_second": 525.879, "eval_steps_per_second": 2.521, "step": 306 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.4884, "step": 340 }, { "epoch": 10.0, "eval_loss": 0.7009902596473694, "eval_matthews_correlation": 0.08008155523655092, "eval_runtime": 1.7208, "eval_samples_per_second": 606.103, "eval_steps_per_second": 2.906, "step": 340 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.4559, "step": 374 }, { "epoch": 11.0, "eval_loss": 0.6731011867523193, "eval_matthews_correlation": 0.09051190856095573, "eval_runtime": 1.7848, "eval_samples_per_second": 584.391, "eval_steps_per_second": 2.801, "step": 374 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.4367, "step": 408 }, { "epoch": 12.0, "eval_loss": 0.6893478035926819, "eval_matthews_correlation": 0.09007205990892461, "eval_runtime": 1.959, "eval_samples_per_second": 532.427, "eval_steps_per_second": 2.552, "step": 408 }, { "epoch": 12.0, "step": 408, "total_flos": 3258721140473856.0, "train_loss": 0.5614397408915501, "train_runtime": 608.6002, "train_samples_per_second": 702.514, "train_steps_per_second": 2.793 } ], "max_steps": 1700, "num_train_epochs": 50, "total_flos": 3258721140473856.0, "trial_name": null, "trial_params": null }