{ "best_metric": 1.2869539260864258, "best_model_checkpoint": "./checkpoints/pegasus-xsum/checkpoint-11412", "epoch": 12.0, "global_step": 11412, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27, "learning_rate": 5e-06, "loss": 8.3438, "step": 256 }, { "epoch": 0.54, "learning_rate": 1e-05, "loss": 7.5374, "step": 512 }, { "epoch": 0.81, "learning_rate": 9.93178426774675e-06, "loss": 7.0835, "step": 768 }, { "epoch": 1.0, "eval_loss": 6.375093936920166, "eval_runtime": 8.2682, "eval_samples_per_second": 60.473, "eval_steps_per_second": 7.62, "step": 951 }, { "epoch": 1.08, "learning_rate": 9.863568535493498e-06, "loss": 6.7408, "step": 1024 }, { "epoch": 1.35, "learning_rate": 9.795352803240248e-06, "loss": 6.0547, "step": 1280 }, { "epoch": 1.62, "learning_rate": 9.727137070986997e-06, "loss": 4.6247, "step": 1536 }, { "epoch": 1.88, "learning_rate": 9.658921338733747e-06, "loss": 2.6363, "step": 1792 }, { "epoch": 2.0, "eval_loss": 1.4205403327941895, "eval_runtime": 8.2748, "eval_samples_per_second": 60.424, "eval_steps_per_second": 7.613, "step": 1902 }, { "epoch": 2.15, "learning_rate": 9.590705606480494e-06, "loss": 1.6679, "step": 2048 }, { "epoch": 2.42, "learning_rate": 9.522489874227244e-06, "loss": 1.5104, "step": 2304 }, { "epoch": 2.69, "learning_rate": 9.454274141973993e-06, "loss": 1.5184, "step": 2560 }, { "epoch": 2.96, "learning_rate": 9.386058409720743e-06, "loss": 1.4953, "step": 2816 }, { "epoch": 3.0, "eval_loss": 1.3500312566757202, "eval_runtime": 8.2581, "eval_samples_per_second": 60.546, "eval_steps_per_second": 7.629, "step": 2853 }, { "epoch": 3.23, "learning_rate": 9.31784267746749e-06, "loss": 1.4483, "step": 3072 }, { "epoch": 3.5, "learning_rate": 9.24962694521424e-06, "loss": 1.3758, "step": 3328 }, { "epoch": 3.77, "learning_rate": 9.18141121296099e-06, "loss": 1.3732, "step": 3584 }, { "epoch": 4.0, "eval_loss": 1.3236401081085205, "eval_runtime": 8.2531, "eval_samples_per_second": 60.584, "eval_steps_per_second": 7.634, "step": 3804 }, { "epoch": 4.04, "learning_rate": 9.11319548070774e-06, "loss": 1.4249, "step": 3840 }, { "epoch": 4.31, "learning_rate": 9.044979748454487e-06, "loss": 1.4052, "step": 4096 }, { "epoch": 4.58, "learning_rate": 8.976764016201236e-06, "loss": 1.3323, "step": 4352 }, { "epoch": 4.85, "learning_rate": 8.908548283947986e-06, "loss": 1.3573, "step": 4608 }, { "epoch": 5.0, "eval_loss": 1.311529517173767, "eval_runtime": 8.2933, "eval_samples_per_second": 60.29, "eval_steps_per_second": 7.597, "step": 4755 }, { "epoch": 5.11, "learning_rate": 8.840332551694735e-06, "loss": 1.3169, "step": 4864 }, { "epoch": 5.38, "learning_rate": 8.772116819441483e-06, "loss": 1.3142, "step": 5120 }, { "epoch": 5.65, "learning_rate": 8.703901087188233e-06, "loss": 1.3562, "step": 5376 }, { "epoch": 5.92, "learning_rate": 8.635685354934982e-06, "loss": 1.3171, "step": 5632 }, { "epoch": 6.0, "eval_loss": 1.3023052215576172, "eval_runtime": 8.3165, "eval_samples_per_second": 60.122, "eval_steps_per_second": 7.575, "step": 5706 }, { "epoch": 6.19, "learning_rate": 8.567469622681732e-06, "loss": 1.3457, "step": 5888 }, { "epoch": 6.46, "learning_rate": 8.499253890428481e-06, "loss": 1.2753, "step": 6144 }, { "epoch": 6.73, "learning_rate": 8.431038158175229e-06, "loss": 1.2898, "step": 6400 }, { "epoch": 7.0, "learning_rate": 8.362822425921979e-06, "loss": 1.2848, "step": 6656 }, { "epoch": 7.0, "eval_loss": 1.2965139150619507, "eval_runtime": 8.2552, "eval_samples_per_second": 60.568, "eval_steps_per_second": 7.632, "step": 6657 }, { "epoch": 7.27, "learning_rate": 8.294606693668728e-06, "loss": 1.272, "step": 6912 }, { "epoch": 7.54, "learning_rate": 8.226390961415478e-06, "loss": 1.2554, "step": 7168 }, { "epoch": 7.81, "learning_rate": 8.158175229162227e-06, "loss": 1.2676, "step": 7424 }, { "epoch": 8.0, "eval_loss": 1.2928757667541504, "eval_runtime": 8.2765, "eval_samples_per_second": 60.412, "eval_steps_per_second": 7.612, "step": 7608 }, { "epoch": 8.08, "learning_rate": 8.089959496908975e-06, "loss": 1.305, "step": 7680 }, { "epoch": 8.34, "learning_rate": 8.021743764655724e-06, "loss": 1.2323, "step": 7936 }, { "epoch": 8.61, "learning_rate": 7.953528032402474e-06, "loss": 1.2779, "step": 8192 }, { "epoch": 8.88, "learning_rate": 7.885312300149223e-06, "loss": 1.2661, "step": 8448 }, { "epoch": 9.0, "eval_loss": 1.2911019325256348, "eval_runtime": 8.2542, "eval_samples_per_second": 60.575, "eval_steps_per_second": 7.632, "step": 8559 }, { "epoch": 9.15, "learning_rate": 7.817096567895973e-06, "loss": 1.2381, "step": 8704 }, { "epoch": 9.42, "learning_rate": 7.74888083564272e-06, "loss": 1.2363, "step": 8960 }, { "epoch": 9.69, "learning_rate": 7.68066510338947e-06, "loss": 1.2129, "step": 9216 }, { "epoch": 9.96, "learning_rate": 7.612449371136219e-06, "loss": 1.232, "step": 9472 }, { "epoch": 10.0, "eval_loss": 1.288682460784912, "eval_runtime": 8.2411, "eval_samples_per_second": 60.671, "eval_steps_per_second": 7.645, "step": 9510 }, { "epoch": 10.23, "learning_rate": 7.544233638882968e-06, "loss": 1.2191, "step": 9728 }, { "epoch": 10.5, "learning_rate": 7.476017906629717e-06, "loss": 1.2438, "step": 9984 }, { "epoch": 10.77, "learning_rate": 7.4078021743764664e-06, "loss": 1.1995, "step": 10240 }, { "epoch": 11.0, "eval_loss": 1.2871848344802856, "eval_runtime": 8.2533, "eval_samples_per_second": 60.582, "eval_steps_per_second": 7.633, "step": 10461 }, { "epoch": 11.04, "learning_rate": 7.339586442123215e-06, "loss": 1.223, "step": 10496 }, { "epoch": 11.31, "learning_rate": 7.2713707098699646e-06, "loss": 1.1895, "step": 10752 }, { "epoch": 11.58, "learning_rate": 7.203154977616713e-06, "loss": 1.2133, "step": 11008 }, { "epoch": 11.84, "learning_rate": 7.134939245363463e-06, "loss": 1.1951, "step": 11264 }, { "epoch": 12.0, "eval_loss": 1.2869539260864258, "eval_runtime": 8.2328, "eval_samples_per_second": 60.733, "eval_steps_per_second": 7.652, "step": 11412 } ], "max_steps": 38040, "num_train_epochs": 40, "total_flos": 1.3185127263515443e+17, "trial_name": null, "trial_params": null }