{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.027900488258544524, "eval_steps": 8, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009300162752848175, "eval_loss": 2.124025344848633, "eval_runtime": 88.5751, "eval_samples_per_second": 5.114, "eval_steps_per_second": 2.563, "step": 1 }, { "epoch": 0.0027900488258544524, "grad_norm": 5.161037921905518, "learning_rate": 6e-05, "loss": 2.0161, "step": 3 }, { "epoch": 0.005580097651708905, "grad_norm": 1.55072820186615, "learning_rate": 0.00012, "loss": 1.7504, "step": 6 }, { "epoch": 0.00744013020227854, "eval_loss": 1.6678786277770996, "eval_runtime": 89.6936, "eval_samples_per_second": 5.051, "eval_steps_per_second": 2.531, "step": 8 }, { "epoch": 0.008370146477563357, "grad_norm": 1.5047965049743652, "learning_rate": 0.00018, "loss": 1.671, "step": 9 }, { "epoch": 0.01116019530341781, "grad_norm": 1.5108683109283447, "learning_rate": 0.00019510565162951537, "loss": 1.5883, "step": 12 }, { "epoch": 0.013950244129272262, "grad_norm": 1.0921416282653809, "learning_rate": 0.00017071067811865476, "loss": 1.5418, "step": 15 }, { "epoch": 0.01488026040455708, "eval_loss": 1.5838655233383179, "eval_runtime": 89.5798, "eval_samples_per_second": 5.057, "eval_steps_per_second": 2.534, "step": 16 }, { "epoch": 0.016740292955126714, "grad_norm": 1.2232215404510498, "learning_rate": 0.00013090169943749476, "loss": 1.5445, "step": 18 }, { "epoch": 0.01953034178098117, "grad_norm": 1.3080086708068848, "learning_rate": 8.435655349597689e-05, "loss": 1.6126, "step": 21 }, { "epoch": 0.02232039060683562, "grad_norm": 1.3865599632263184, "learning_rate": 4.12214747707527e-05, "loss": 1.604, "step": 24 }, { "epoch": 0.02232039060683562, "eval_loss": 1.5507044792175293, "eval_runtime": 89.9086, "eval_samples_per_second": 5.038, "eval_steps_per_second": 2.525, "step": 24 }, { "epoch": 0.025110439432690073, "grad_norm": 0.9281854033470154, "learning_rate": 1.0899347581163221e-05, "loss": 1.4299, "step": 27 }, { "epoch": 0.027900488258544524, "grad_norm": 0.8828859925270081, "learning_rate": 0.0, "loss": 1.4655, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.235418813038592e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }