{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7692307692307693, "eval_steps": 9, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03076923076923077, "grad_norm": 0.19627095758914948, "learning_rate": 1e-05, "loss": 10.3769, "step": 1 }, { "epoch": 0.03076923076923077, "eval_loss": 10.386632919311523, "eval_runtime": 0.0801, "eval_samples_per_second": 1361.483, "eval_steps_per_second": 49.963, "step": 1 }, { "epoch": 0.06153846153846154, "grad_norm": 0.20535489916801453, "learning_rate": 2e-05, "loss": 10.3764, "step": 2 }, { "epoch": 0.09230769230769231, "grad_norm": 0.1900486946105957, "learning_rate": 3e-05, "loss": 10.382, "step": 3 }, { "epoch": 0.12307692307692308, "grad_norm": 0.2189124971628189, "learning_rate": 4e-05, "loss": 10.3854, "step": 4 }, { "epoch": 0.15384615384615385, "grad_norm": 0.19613224267959595, "learning_rate": 5e-05, "loss": 10.3846, "step": 5 }, { "epoch": 0.18461538461538463, "grad_norm": 0.21051953732967377, "learning_rate": 6e-05, "loss": 10.3951, "step": 6 }, { "epoch": 0.2153846153846154, "grad_norm": 0.193317711353302, "learning_rate": 7e-05, "loss": 10.4099, "step": 7 }, { "epoch": 0.24615384615384617, "grad_norm": 0.22925445437431335, "learning_rate": 8e-05, "loss": 10.4001, "step": 8 }, { "epoch": 0.27692307692307694, "grad_norm": 0.2118426263332367, "learning_rate": 9e-05, "loss": 10.4226, "step": 9 }, { "epoch": 0.27692307692307694, "eval_loss": 10.384289741516113, "eval_runtime": 0.075, "eval_samples_per_second": 1452.898, "eval_steps_per_second": 53.317, "step": 9 }, { "epoch": 0.3076923076923077, "grad_norm": 0.22410708665847778, "learning_rate": 0.0001, "loss": 10.3974, "step": 10 }, { "epoch": 0.3384615384615385, "grad_norm": 0.28219085931777954, "learning_rate": 9.996740476948385e-05, "loss": 10.3593, "step": 11 }, { "epoch": 0.36923076923076925, "grad_norm": 0.2661738991737366, "learning_rate": 9.98696615758975e-05, "loss": 10.3826, "step": 12 }, { "epoch": 0.4, "grad_norm": 0.26590314507484436, "learning_rate": 9.970689785771798e-05, "loss": 10.3853, "step": 13 }, { "epoch": 0.4307692307692308, "grad_norm": 0.23882359266281128, "learning_rate": 9.947932582778188e-05, "loss": 10.3944, "step": 14 }, { "epoch": 0.46153846153846156, "grad_norm": 0.2391405999660492, "learning_rate": 9.918724219660013e-05, "loss": 10.357, "step": 15 }, { "epoch": 0.49230769230769234, "grad_norm": 0.2403474599123001, "learning_rate": 9.883102778550434e-05, "loss": 10.3803, "step": 16 }, { "epoch": 0.5230769230769231, "grad_norm": 0.22196514904499054, "learning_rate": 9.841114703012817e-05, "loss": 10.3643, "step": 17 }, { "epoch": 0.5538461538461539, "grad_norm": 0.245796337723732, "learning_rate": 9.792814737487207e-05, "loss": 10.4181, "step": 18 }, { "epoch": 0.5538461538461539, "eval_loss": 10.378185272216797, "eval_runtime": 0.0762, "eval_samples_per_second": 1429.641, "eval_steps_per_second": 52.464, "step": 18 }, { "epoch": 0.5846153846153846, "grad_norm": 0.2934595048427582, "learning_rate": 9.738265855914013e-05, "loss": 10.3524, "step": 19 }, { "epoch": 0.6153846153846154, "grad_norm": 0.29529669880867004, "learning_rate": 9.677539179628005e-05, "loss": 10.3852, "step": 20 }, { "epoch": 0.6461538461538462, "grad_norm": 0.2764834761619568, "learning_rate": 9.610713884629666e-05, "loss": 10.3627, "step": 21 }, { "epoch": 0.676923076923077, "grad_norm": 0.270579993724823, "learning_rate": 9.537877098354786e-05, "loss": 10.3802, "step": 22 }, { "epoch": 0.7076923076923077, "grad_norm": 0.28736695647239685, "learning_rate": 9.459123786076912e-05, "loss": 10.3475, "step": 23 }, { "epoch": 0.7384615384615385, "grad_norm": 0.27069252729415894, "learning_rate": 9.374556627090749e-05, "loss": 10.3726, "step": 24 }, { "epoch": 0.7692307692307693, "grad_norm": 0.2778293192386627, "learning_rate": 9.284285880837946e-05, "loss": 10.3683, "step": 25 } ], "logging_steps": 1, "max_steps": 97, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5230244659200.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }