{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1057, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0946073793755913, "grad_norm": 1.295289158821106, "learning_rate": 0.0009700409965310628, "loss": 2.3794, "step": 100 }, { "epoch": 0.1892147587511826, "grad_norm": 0.7961627244949341, "learning_rate": 0.0009385052034058657, "loss": 0.5368, "step": 200 }, { "epoch": 0.28382213812677387, "grad_norm": 0.8890175223350525, "learning_rate": 0.0009069694102806686, "loss": 0.3488, "step": 300 }, { "epoch": 0.3784295175023652, "grad_norm": 0.5166388154029846, "learning_rate": 0.0008754336171554715, "loss": 0.2712, "step": 400 }, { "epoch": 0.47303689687795647, "grad_norm": 0.3027492165565491, "learning_rate": 0.0008438978240302744, "loss": 0.2219, "step": 500 }, { "epoch": 0.5676442762535477, "grad_norm": 0.8666072487831116, "learning_rate": 0.0008123620309050773, "loss": 0.1906, "step": 600 }, { "epoch": 0.6622516556291391, "grad_norm": 0.5293228030204773, "learning_rate": 0.0007808262377798802, "loss": 0.17, "step": 700 }, { "epoch": 0.7568590350047304, "grad_norm": 0.33755800127983093, "learning_rate": 0.0007492904446546831, "loss": 0.1506, "step": 800 }, { "epoch": 0.8514664143803217, "grad_norm": 0.3760841190814972, "learning_rate": 0.000717754651529486, "loss": 0.142, "step": 900 }, { "epoch": 0.9460737937559129, "grad_norm": 0.3145512044429779, "learning_rate": 0.0006862188584042889, "loss": 0.1256, "step": 1000 } ], "logging_steps": 100, "max_steps": 3171, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 67458613248000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }