{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.8656716417910446, "eval_steps": 2, "global_step": 24, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 1e-05, "loss": 0.9774, "step": 1 }, { "epoch": 0.24, "learning_rate": 9.953429730181653e-06, "loss": 0.9731, "step": 2 }, { "epoch": 0.24, "eval_loss": 0.9532256126403809, "eval_runtime": 45.7362, "eval_samples_per_second": 1.968, "eval_steps_per_second": 1.968, "step": 2 }, { "epoch": 0.36, "learning_rate": 9.814586436738998e-06, "loss": 0.9614, "step": 3 }, { "epoch": 0.48, "learning_rate": 9.586056507527266e-06, "loss": 0.999, "step": 4 }, { "epoch": 0.48, "eval_loss": 0.947267472743988, "eval_runtime": 45.4678, "eval_samples_per_second": 1.979, "eval_steps_per_second": 1.979, "step": 4 }, { "epoch": 0.6, "learning_rate": 9.272097022732444e-06, "loss": 0.9813, "step": 5 }, { "epoch": 0.72, "learning_rate": 8.8785564535221e-06, "loss": 0.9045, "step": 6 }, { "epoch": 0.72, "eval_loss": 0.9444782733917236, "eval_runtime": 45.6046, "eval_samples_per_second": 1.973, "eval_steps_per_second": 1.973, "step": 6 }, { "epoch": 0.84, "learning_rate": 8.412765716093273e-06, "loss": 0.9201, "step": 7 }, { "epoch": 0.96, "learning_rate": 7.883401610574338e-06, "loss": 0.9041, "step": 8 }, { "epoch": 0.96, "eval_loss": 0.9425725936889648, "eval_runtime": 45.4409, "eval_samples_per_second": 1.981, "eval_steps_per_second": 1.981, "step": 8 }, { "epoch": 1.07, "learning_rate": 7.300325188655762e-06, "loss": 0.9619, "step": 9 }, { "epoch": 1.19, "learning_rate": 6.674398060854931e-06, "loss": 0.9645, "step": 10 }, { "epoch": 1.19, "eval_loss": 0.9409478306770325, "eval_runtime": 45.4535, "eval_samples_per_second": 1.98, "eval_steps_per_second": 1.98, "step": 10 }, { "epoch": 1.31, "learning_rate": 6.0172800652631706e-06, "loss": 0.9642, "step": 11 }, { "epoch": 1.43, "learning_rate": 5.341212066823356e-06, "loss": 0.8638, "step": 12 }, { "epoch": 1.43, "eval_loss": 0.9403448104858398, "eval_runtime": 45.3553, "eval_samples_per_second": 1.984, "eval_steps_per_second": 1.984, "step": 12 }, { "epoch": 1.55, "learning_rate": 4.6587879331766465e-06, "loss": 0.9883, "step": 13 }, { "epoch": 1.67, "learning_rate": 3.982719934736832e-06, "loss": 0.8556, "step": 14 }, { "epoch": 1.67, "eval_loss": 0.9399924278259277, "eval_runtime": 45.4921, "eval_samples_per_second": 1.978, "eval_steps_per_second": 1.978, "step": 14 }, { "epoch": 1.79, "learning_rate": 3.3256019391450696e-06, "loss": 0.9509, "step": 15 }, { "epoch": 1.91, "learning_rate": 2.6996748113442397e-06, "loss": 0.8926, "step": 16 }, { "epoch": 1.91, "eval_loss": 0.9397569298744202, "eval_runtime": 45.3853, "eval_samples_per_second": 1.983, "eval_steps_per_second": 1.983, "step": 16 }, { "epoch": 2.03, "learning_rate": 2.1165983894256647e-06, "loss": 0.984, "step": 17 }, { "epoch": 2.15, "learning_rate": 1.5872342839067305e-06, "loss": 0.9494, "step": 18 }, { "epoch": 2.15, "eval_loss": 0.9394004940986633, "eval_runtime": 45.4356, "eval_samples_per_second": 1.981, "eval_steps_per_second": 1.981, "step": 18 }, { "epoch": 2.27, "learning_rate": 1.1214435464779006e-06, "loss": 0.9571, "step": 19 }, { "epoch": 2.39, "learning_rate": 7.279029772675572e-07, "loss": 0.9486, "step": 20 }, { "epoch": 2.39, "eval_loss": 0.9396945238113403, "eval_runtime": 45.5, "eval_samples_per_second": 1.978, "eval_steps_per_second": 1.978, "step": 20 }, { "epoch": 2.51, "learning_rate": 4.139434924727359e-07, "loss": 0.8492, "step": 21 }, { "epoch": 2.63, "learning_rate": 1.8541356326100436e-07, "loss": 0.9434, "step": 22 }, { "epoch": 2.63, "eval_loss": 0.9391700029373169, "eval_runtime": 45.3894, "eval_samples_per_second": 1.983, "eval_steps_per_second": 1.983, "step": 22 }, { "epoch": 2.75, "learning_rate": 4.657026981834623e-08, "loss": 0.9995, "step": 23 }, { "epoch": 2.87, "learning_rate": 0.0, "loss": 0.9475, "step": 24 }, { "epoch": 2.87, "eval_loss": 0.9397835731506348, "eval_runtime": 45.4245, "eval_samples_per_second": 1.981, "eval_steps_per_second": 1.981, "step": 24 }, { "epoch": 2.87, "step": 24, "total_flos": 6.417806722282291e+16, "train_loss": 0.9433953066666921, "train_runtime": 1841.1427, "train_samples_per_second": 0.647, "train_steps_per_second": 0.013 } ], "logging_steps": 1.0, "max_steps": 24, "num_train_epochs": 3, "save_steps": 3, "total_flos": 6.417806722282291e+16, "trial_name": null, "trial_params": null }