{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0638297872340425, "eval_steps": 3, "global_step": 36, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0851063829787234, "eval_loss": 3.537301540374756, "eval_runtime": 0.8155, "eval_samples_per_second": 24.526, "eval_steps_per_second": 3.679, "step": 1 }, { "epoch": 0.2553191489361702, "grad_norm": 4.053133964538574, "learning_rate": 3e-05, "loss": 3.9013, "step": 3 }, { "epoch": 0.2553191489361702, "eval_loss": 3.498924732208252, "eval_runtime": 0.4147, "eval_samples_per_second": 48.228, "eval_steps_per_second": 7.234, "step": 3 }, { "epoch": 0.5106382978723404, "grad_norm": 3.631814479827881, "learning_rate": 6e-05, "loss": 3.5773, "step": 6 }, { "epoch": 0.5106382978723404, "eval_loss": 3.226794481277466, "eval_runtime": 0.4126, "eval_samples_per_second": 48.473, "eval_steps_per_second": 7.271, "step": 6 }, { "epoch": 0.7659574468085106, "grad_norm": 3.4452314376831055, "learning_rate": 9e-05, "loss": 3.432, "step": 9 }, { "epoch": 0.7659574468085106, "eval_loss": 3.062047243118286, "eval_runtime": 0.416, "eval_samples_per_second": 48.077, "eval_steps_per_second": 7.211, "step": 9 }, { "epoch": 1.0212765957446808, "grad_norm": 4.869143962860107, "learning_rate": 9.85470908713026e-05, "loss": 3.4585, "step": 12 }, { "epoch": 1.0212765957446808, "eval_loss": 2.945756196975708, "eval_runtime": 0.4153, "eval_samples_per_second": 48.158, "eval_steps_per_second": 7.224, "step": 12 }, { "epoch": 1.2765957446808511, "grad_norm": 3.5793509483337402, "learning_rate": 9.114919329468282e-05, "loss": 2.8128, "step": 15 }, { "epoch": 1.2765957446808511, "eval_loss": 2.8750696182250977, "eval_runtime": 0.4152, "eval_samples_per_second": 48.165, "eval_steps_per_second": 7.225, "step": 15 }, { "epoch": 1.5319148936170213, "grad_norm": 3.459306001663208, "learning_rate": 7.840323733655778e-05, "loss": 2.6305, "step": 18 }, { "epoch": 1.5319148936170213, "eval_loss": 2.800814151763916, "eval_runtime": 0.4153, "eval_samples_per_second": 48.16, "eval_steps_per_second": 7.224, "step": 18 }, { "epoch": 1.7872340425531914, "grad_norm": 2.895676851272583, "learning_rate": 6.19657832143779e-05, "loss": 2.6033, "step": 21 }, { "epoch": 1.7872340425531914, "eval_loss": 2.7672295570373535, "eval_runtime": 0.4158, "eval_samples_per_second": 48.098, "eval_steps_per_second": 7.215, "step": 21 }, { "epoch": 2.0425531914893615, "grad_norm": 3.6542279720306396, "learning_rate": 4.397316598723385e-05, "loss": 2.8825, "step": 24 }, { "epoch": 2.0425531914893615, "eval_loss": 2.7445950508117676, "eval_runtime": 0.4188, "eval_samples_per_second": 47.757, "eval_steps_per_second": 7.164, "step": 24 }, { "epoch": 2.297872340425532, "grad_norm": 2.8489627838134766, "learning_rate": 2.6763841397811573e-05, "loss": 2.2496, "step": 27 }, { "epoch": 2.297872340425532, "eval_loss": 2.7331249713897705, "eval_runtime": 0.4223, "eval_samples_per_second": 47.356, "eval_steps_per_second": 7.103, "step": 27 }, { "epoch": 2.5531914893617023, "grad_norm": 2.887648820877075, "learning_rate": 1.257446259144494e-05, "loss": 2.4327, "step": 30 }, { "epoch": 2.5531914893617023, "eval_loss": 2.729001998901367, "eval_runtime": 0.4168, "eval_samples_per_second": 47.983, "eval_steps_per_second": 7.197, "step": 30 }, { "epoch": 2.8085106382978724, "grad_norm": 2.3609039783477783, "learning_rate": 3.249187865729264e-06, "loss": 2.3787, "step": 33 }, { "epoch": 2.8085106382978724, "eval_loss": 2.7300968170166016, "eval_runtime": 0.4182, "eval_samples_per_second": 47.827, "eval_steps_per_second": 7.174, "step": 33 }, { "epoch": 3.0638297872340425, "grad_norm": 2.7310924530029297, "learning_rate": 0.0, "loss": 2.3388, "step": 36 }, { "epoch": 3.0638297872340425, "eval_loss": 2.7303249835968018, "eval_runtime": 0.4199, "eval_samples_per_second": 47.631, "eval_steps_per_second": 7.145, "step": 36 } ], "logging_steps": 3, "max_steps": 36, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7206952438333440.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }