{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.909090909090909, "eval_steps": 2, "global_step": 16, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18181818181818182, "grad_norm": 0.0655059739947319, "learning_rate": 1e-05, "loss": 10.3814, "step": 1 }, { "epoch": 0.18181818181818182, "eval_loss": 10.373859405517578, "eval_runtime": 0.0149, "eval_samples_per_second": 1210.594, "eval_steps_per_second": 67.255, "step": 1 }, { "epoch": 0.36363636363636365, "grad_norm": 0.06763208657503128, "learning_rate": 2e-05, "loss": 10.3794, "step": 2 }, { "epoch": 0.36363636363636365, "eval_loss": 10.37384033203125, "eval_runtime": 0.014, "eval_samples_per_second": 1286.509, "eval_steps_per_second": 71.473, "step": 2 }, { "epoch": 0.5454545454545454, "grad_norm": 0.05924370139837265, "learning_rate": 3e-05, "loss": 10.3799, "step": 3 }, { "epoch": 0.7272727272727273, "grad_norm": 0.07271450757980347, "learning_rate": 4e-05, "loss": 10.3762, "step": 4 }, { "epoch": 0.7272727272727273, "eval_loss": 10.373745918273926, "eval_runtime": 0.0137, "eval_samples_per_second": 1310.47, "eval_steps_per_second": 72.804, "step": 4 }, { "epoch": 0.9090909090909091, "grad_norm": 0.07363289594650269, "learning_rate": 5e-05, "loss": 10.3796, "step": 5 }, { "epoch": 1.0909090909090908, "grad_norm": 0.09829460829496384, "learning_rate": 6e-05, "loss": 15.5502, "step": 6 }, { "epoch": 1.0909090909090908, "eval_loss": 10.373668670654297, "eval_runtime": 0.0138, "eval_samples_per_second": 1302.219, "eval_steps_per_second": 72.346, "step": 6 }, { "epoch": 1.2727272727272727, "grad_norm": 0.0690544992685318, "learning_rate": 7e-05, "loss": 10.4244, "step": 7 }, { "epoch": 1.4545454545454546, "grad_norm": 0.0672140046954155, "learning_rate": 8e-05, "loss": 10.3356, "step": 8 }, { "epoch": 1.4545454545454546, "eval_loss": 10.373486518859863, "eval_runtime": 0.0138, "eval_samples_per_second": 1308.993, "eval_steps_per_second": 72.722, "step": 8 }, { "epoch": 1.6363636363636362, "grad_norm": 0.06882895529270172, "learning_rate": 9e-05, "loss": 10.3282, "step": 9 }, { "epoch": 1.8181818181818183, "grad_norm": 0.07141602784395218, "learning_rate": 0.0001, "loss": 10.411, "step": 10 }, { "epoch": 1.8181818181818183, "eval_loss": 10.373241424560547, "eval_runtime": 0.0139, "eval_samples_per_second": 1298.613, "eval_steps_per_second": 72.145, "step": 10 }, { "epoch": 2.0, "grad_norm": 0.10249509662389755, "learning_rate": 9.330127018922194e-05, "loss": 15.5975, "step": 11 }, { "epoch": 2.1818181818181817, "grad_norm": 0.07137065380811691, "learning_rate": 7.500000000000001e-05, "loss": 10.3779, "step": 12 }, { "epoch": 2.1818181818181817, "eval_loss": 10.373038291931152, "eval_runtime": 0.0138, "eval_samples_per_second": 1305.236, "eval_steps_per_second": 72.513, "step": 12 }, { "epoch": 2.3636363636363638, "grad_norm": 0.06905053555965424, "learning_rate": 5e-05, "loss": 10.3807, "step": 13 }, { "epoch": 2.5454545454545454, "grad_norm": 0.0683932676911354, "learning_rate": 2.500000000000001e-05, "loss": 10.3721, "step": 14 }, { "epoch": 2.5454545454545454, "eval_loss": 10.372879028320312, "eval_runtime": 0.0138, "eval_samples_per_second": 1305.011, "eval_steps_per_second": 72.501, "step": 14 }, { "epoch": 2.7272727272727275, "grad_norm": 0.07640717178583145, "learning_rate": 6.698729810778065e-06, "loss": 10.3795, "step": 15 }, { "epoch": 2.909090909090909, "grad_norm": 0.06950083374977112, "learning_rate": 0.0, "loss": 10.3801, "step": 16 }, { "epoch": 2.909090909090909, "eval_loss": 10.372831344604492, "eval_runtime": 0.0137, "eval_samples_per_second": 1318.549, "eval_steps_per_second": 73.253, "step": 16 } ], "logging_steps": 1, "max_steps": 16, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3347356581888.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }