{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.31494079113126733, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 9.5e-05, "loss": 37.4397, "step": 500 }, { "epoch": 0.03, "eval_loss": 31.211898803710938, "eval_runtime": 6.944, "eval_samples_per_second": 14.401, "eval_steps_per_second": 3.6, "step": 500 }, { "epoch": 0.06, "learning_rate": 9e-05, "loss": 33.7011, "step": 1000 }, { "epoch": 0.06, "eval_loss": 30.628690719604492, "eval_runtime": 8.8362, "eval_samples_per_second": 11.317, "eval_steps_per_second": 2.829, "step": 1000 }, { "epoch": 0.09, "learning_rate": 8.5e-05, "loss": 34.3949, "step": 1500 }, { "epoch": 0.09, "eval_loss": 30.2351016998291, "eval_runtime": 9.7911, "eval_samples_per_second": 10.213, "eval_steps_per_second": 2.553, "step": 1500 }, { "epoch": 0.13, "learning_rate": 8e-05, "loss": 33.7534, "step": 2000 }, { "epoch": 0.13, "eval_loss": 30.151206970214844, "eval_runtime": 9.0893, "eval_samples_per_second": 11.002, "eval_steps_per_second": 2.75, "step": 2000 }, { "epoch": 0.16, "learning_rate": 7.500000000000001e-05, "loss": 33.9418, "step": 2500 }, { "epoch": 0.16, "eval_loss": 30.117929458618164, "eval_runtime": 7.9747, "eval_samples_per_second": 12.54, "eval_steps_per_second": 3.135, "step": 2500 }, { "epoch": 0.19, "learning_rate": 7e-05, "loss": 33.3573, "step": 3000 }, { "epoch": 0.19, "eval_loss": 29.591096878051758, "eval_runtime": 7.5459, "eval_samples_per_second": 13.252, "eval_steps_per_second": 3.313, "step": 3000 }, { "epoch": 0.22, "learning_rate": 6.500000000000001e-05, "loss": 33.3372, "step": 3500 }, { "epoch": 0.22, "eval_loss": 29.37584686279297, "eval_runtime": 9.1309, "eval_samples_per_second": 10.952, "eval_steps_per_second": 2.738, "step": 3500 }, { "epoch": 0.25, "learning_rate": 6e-05, "loss": 32.5095, "step": 4000 }, { "epoch": 0.25, "eval_loss": 29.21722412109375, "eval_runtime": 8.935, "eval_samples_per_second": 11.192, "eval_steps_per_second": 2.798, "step": 4000 }, { "epoch": 0.28, "learning_rate": 5.500000000000001e-05, "loss": 32.6192, "step": 4500 }, { "epoch": 0.28, "eval_loss": 28.85555648803711, "eval_runtime": 8.0464, "eval_samples_per_second": 12.428, "eval_steps_per_second": 3.107, "step": 4500 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 32.902, "step": 5000 }, { "epoch": 0.31, "eval_loss": 28.798521041870117, "eval_runtime": 8.3773, "eval_samples_per_second": 11.937, "eval_steps_per_second": 2.984, "step": 5000 } ], "logging_steps": 500, "max_steps": 10000, "num_train_epochs": 1, "save_steps": 5000, "total_flos": 6.84757352448e+16, "trial_name": null, "trial_params": null }