{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 3171, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0946073793755913, "grad_norm": 1.295289158821106, "learning_rate": 0.0009700409965310628, "loss": 2.3794, "step": 100 }, { "epoch": 0.1892147587511826, "grad_norm": 0.7961627244949341, "learning_rate": 0.0009385052034058657, "loss": 0.5368, "step": 200 }, { "epoch": 0.28382213812677387, "grad_norm": 0.8890175223350525, "learning_rate": 0.0009069694102806686, "loss": 0.3488, "step": 300 }, { "epoch": 0.3784295175023652, "grad_norm": 0.5166388154029846, "learning_rate": 0.0008754336171554715, "loss": 0.2712, "step": 400 }, { "epoch": 0.47303689687795647, "grad_norm": 0.3027492165565491, "learning_rate": 0.0008438978240302744, "loss": 0.2219, "step": 500 }, { "epoch": 0.5676442762535477, "grad_norm": 0.8666072487831116, "learning_rate": 0.0008123620309050773, "loss": 0.1906, "step": 600 }, { "epoch": 0.6622516556291391, "grad_norm": 0.5293228030204773, "learning_rate": 0.0007808262377798802, "loss": 0.17, "step": 700 }, { "epoch": 0.7568590350047304, "grad_norm": 0.33755800127983093, "learning_rate": 0.0007492904446546831, "loss": 0.1506, "step": 800 }, { "epoch": 0.8514664143803217, "grad_norm": 0.3760841190814972, "learning_rate": 0.000717754651529486, "loss": 0.142, "step": 900 }, { "epoch": 0.9460737937559129, "grad_norm": 0.3145512044429779, "learning_rate": 0.0006862188584042889, "loss": 0.1256, "step": 1000 }, { "epoch": 1.0406811731315042, "grad_norm": 0.47477108240127563, "learning_rate": 0.0006546830652790918, "loss": 0.1225, "step": 1100 }, { "epoch": 1.1352885525070955, "grad_norm": 0.3582330644130707, "learning_rate": 0.0006231472721538947, "loss": 0.1148, "step": 1200 }, { "epoch": 1.2298959318826868, "grad_norm": 0.4500308036804199, "learning_rate": 0.0005916114790286976, "loss": 0.1148, "step": 1300 }, { "epoch": 1.3245033112582782, "grad_norm": 0.2415657341480255, "learning_rate": 0.0005600756859035005, "loss": 0.0934, "step": 1400 }, { "epoch": 1.4191106906338695, "grad_norm": 0.49272701144218445, "learning_rate": 0.0005285398927783034, "loss": 0.098, "step": 1500 }, { "epoch": 1.5137180700094608, "grad_norm": 0.28604432940483093, "learning_rate": 0.0004970040996531063, "loss": 0.1056, "step": 1600 }, { "epoch": 1.608325449385052, "grad_norm": 0.4883616864681244, "learning_rate": 0.00046546830652790914, "loss": 0.0992, "step": 1700 }, { "epoch": 1.7029328287606433, "grad_norm": 0.42010796070098877, "learning_rate": 0.0004339325134027121, "loss": 0.0879, "step": 1800 }, { "epoch": 1.7975402081362346, "grad_norm": 0.23443974554538727, "learning_rate": 0.000402396720277515, "loss": 0.0906, "step": 1900 }, { "epoch": 1.8921475875118259, "grad_norm": 0.6164644956588745, "learning_rate": 0.0003708609271523179, "loss": 0.0834, "step": 2000 }, { "epoch": 1.9867549668874172, "grad_norm": 0.36332041025161743, "learning_rate": 0.0003393251340271208, "loss": 0.0798, "step": 2100 }, { "epoch": 2.0813623462630084, "grad_norm": 0.3371862769126892, "learning_rate": 0.00030778934090192365, "loss": 0.0893, "step": 2200 }, { "epoch": 2.1759697256385997, "grad_norm": 0.2492402046918869, "learning_rate": 0.00027625354777672655, "loss": 0.0768, "step": 2300 }, { "epoch": 2.270577105014191, "grad_norm": 0.34287121891975403, "learning_rate": 0.00024503311258278145, "loss": 0.0776, "step": 2400 }, { "epoch": 2.3651844843897822, "grad_norm": 0.4034591615200043, "learning_rate": 0.00021349731945758435, "loss": 0.0717, "step": 2500 }, { "epoch": 2.4597918637653735, "grad_norm": 0.3876320719718933, "learning_rate": 0.00018196152633238728, "loss": 0.0762, "step": 2600 }, { "epoch": 2.5543992431409652, "grad_norm": 0.16697722673416138, "learning_rate": 0.00015042573320719015, "loss": 0.0688, "step": 2700 }, { "epoch": 2.6490066225165565, "grad_norm": 0.32368209958076477, "learning_rate": 0.00011888994008199306, "loss": 0.0728, "step": 2800 }, { "epoch": 2.7436140018921478, "grad_norm": 0.4008019268512726, "learning_rate": 8.735414695679597e-05, "loss": 0.0756, "step": 2900 }, { "epoch": 2.838221381267739, "grad_norm": 0.25073108077049255, "learning_rate": 5.581835383159887e-05, "loss": 0.0766, "step": 3000 }, { "epoch": 2.9328287606433303, "grad_norm": 0.3861249089241028, "learning_rate": 2.4282560706401765e-05, "loss": 0.0717, "step": 3100 } ], "logging_steps": 100, "max_steps": 3171, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 202375839744000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }