{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.09017132551848513, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0018034265103697023, "eval_loss": 1.5323797464370728, "eval_runtime": 15.1643, "eval_samples_per_second": 15.431, "eval_steps_per_second": 7.715, "step": 1 }, { "epoch": 0.009017132551848512, "grad_norm": 8.76136302947998, "learning_rate": 5e-05, "loss": 6.1682, "step": 5 }, { "epoch": 0.018034265103697024, "grad_norm": 8.025477409362793, "learning_rate": 0.0001, "loss": 5.5289, "step": 10 }, { "epoch": 0.018034265103697024, "eval_loss": 1.2459672689437866, "eval_runtime": 15.635, "eval_samples_per_second": 14.966, "eval_steps_per_second": 7.483, "step": 10 }, { "epoch": 0.027051397655545536, "grad_norm": 6.852028846740723, "learning_rate": 9.619397662556435e-05, "loss": 4.7056, "step": 15 }, { "epoch": 0.03606853020739405, "grad_norm": 5.969870090484619, "learning_rate": 8.535533905932738e-05, "loss": 4.1946, "step": 20 }, { "epoch": 0.03606853020739405, "eval_loss": 1.0455502271652222, "eval_runtime": 15.6064, "eval_samples_per_second": 14.994, "eval_steps_per_second": 7.497, "step": 20 }, { "epoch": 0.04508566275924256, "grad_norm": 6.2113494873046875, "learning_rate": 6.91341716182545e-05, "loss": 4.0385, "step": 25 }, { "epoch": 0.05410279531109107, "grad_norm": 5.920165538787842, "learning_rate": 5e-05, "loss": 4.0129, "step": 30 }, { "epoch": 0.05410279531109107, "eval_loss": 0.9842101335525513, "eval_runtime": 15.6988, "eval_samples_per_second": 14.906, "eval_steps_per_second": 7.453, "step": 30 }, { "epoch": 0.06311992786293959, "grad_norm": 6.285253524780273, "learning_rate": 3.086582838174551e-05, "loss": 3.9661, "step": 35 }, { "epoch": 0.0721370604147881, "grad_norm": 5.879969120025635, "learning_rate": 1.4644660940672627e-05, "loss": 3.7932, "step": 40 }, { "epoch": 0.0721370604147881, "eval_loss": 0.959617555141449, "eval_runtime": 15.7241, "eval_samples_per_second": 14.882, "eval_steps_per_second": 7.441, "step": 40 }, { "epoch": 0.0811541929666366, "grad_norm": 6.4072957038879395, "learning_rate": 3.8060233744356633e-06, "loss": 3.7872, "step": 45 }, { "epoch": 0.09017132551848513, "grad_norm": 6.380867958068848, "learning_rate": 0.0, "loss": 3.6417, "step": 50 }, { "epoch": 0.09017132551848513, "eval_loss": 0.9541131258010864, "eval_runtime": 15.5353, "eval_samples_per_second": 15.062, "eval_steps_per_second": 7.531, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8894809381011456.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }