{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 8240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12135922330097088, "grad_norm": 1.147416591644287, "learning_rate": 9.696601941747573e-05, "loss": 1.1102, "step": 250 }, { "epoch": 0.24271844660194175, "grad_norm": 1.0398495197296143, "learning_rate": 9.393203883495146e-05, "loss": 0.5891, "step": 500 }, { "epoch": 0.3640776699029126, "grad_norm": 0.7667867541313171, "learning_rate": 9.089805825242718e-05, "loss": 0.5176, "step": 750 }, { "epoch": 0.4854368932038835, "grad_norm": 0.7023970484733582, "learning_rate": 8.786407766990292e-05, "loss": 0.4917, "step": 1000 }, { "epoch": 0.6067961165048543, "grad_norm": 0.6781546473503113, "learning_rate": 8.483009708737865e-05, "loss": 0.4822, "step": 1250 }, { "epoch": 0.7281553398058253, "grad_norm": 0.7623434066772461, "learning_rate": 8.179611650485438e-05, "loss": 0.4655, "step": 1500 }, { "epoch": 0.8495145631067961, "grad_norm": 0.8142368793487549, "learning_rate": 7.87621359223301e-05, "loss": 0.45, "step": 1750 }, { "epoch": 0.970873786407767, "grad_norm": 0.8743399381637573, "learning_rate": 7.572815533980583e-05, "loss": 0.4304, "step": 2000 }, { "epoch": 1.0922330097087378, "grad_norm": 0.9348050355911255, "learning_rate": 7.269417475728155e-05, "loss": 0.3979, "step": 2250 }, { "epoch": 1.2135922330097086, "grad_norm": 1.0624396800994873, "learning_rate": 6.966019417475728e-05, "loss": 0.3695, "step": 2500 }, { "epoch": 1.3349514563106797, "grad_norm": 1.0327861309051514, "learning_rate": 6.662621359223301e-05, "loss": 0.3452, "step": 2750 }, { "epoch": 1.4563106796116505, "grad_norm": 1.0362504720687866, "learning_rate": 6.359223300970875e-05, "loss": 0.3194, "step": 3000 }, { "epoch": 1.5776699029126213, "grad_norm": 1.1497058868408203, "learning_rate": 6.055825242718447e-05, "loss": 0.3, "step": 3250 }, { "epoch": 1.6990291262135924, "grad_norm": 1.1127945184707642, "learning_rate": 5.752427184466019e-05, "loss": 0.2829, "step": 3500 }, { "epoch": 1.820388349514563, "grad_norm": 1.0874605178833008, "learning_rate": 5.4490291262135926e-05, "loss": 0.2714, "step": 3750 }, { "epoch": 1.941747572815534, "grad_norm": 0.965722382068634, "learning_rate": 5.145631067961165e-05, "loss": 0.2596, "step": 4000 }, { "epoch": 2.063106796116505, "grad_norm": 1.0977766513824463, "learning_rate": 4.8422330097087385e-05, "loss": 0.2352, "step": 4250 }, { "epoch": 2.1844660194174756, "grad_norm": 1.022316813468933, "learning_rate": 4.538834951456311e-05, "loss": 0.2209, "step": 4500 }, { "epoch": 2.3058252427184467, "grad_norm": 0.982537031173706, "learning_rate": 4.235436893203884e-05, "loss": 0.2171, "step": 4750 }, { "epoch": 2.4271844660194173, "grad_norm": 1.093375563621521, "learning_rate": 3.9320388349514564e-05, "loss": 0.2129, "step": 5000 }, { "epoch": 2.5485436893203883, "grad_norm": 0.9269146919250488, "learning_rate": 3.62864077669903e-05, "loss": 0.2088, "step": 5250 }, { "epoch": 2.6699029126213594, "grad_norm": 1.0317578315734863, "learning_rate": 3.325242718446602e-05, "loss": 0.2032, "step": 5500 }, { "epoch": 2.79126213592233, "grad_norm": 0.9504765868186951, "learning_rate": 3.0218446601941746e-05, "loss": 0.2013, "step": 5750 }, { "epoch": 2.912621359223301, "grad_norm": 0.9305168390274048, "learning_rate": 2.7184466019417475e-05, "loss": 0.1963, "step": 6000 }, { "epoch": 3.033980582524272, "grad_norm": 0.830916166305542, "learning_rate": 2.4150485436893205e-05, "loss": 0.1879, "step": 6250 }, { "epoch": 3.1553398058252426, "grad_norm": 0.8673647046089172, "learning_rate": 2.111650485436893e-05, "loss": 0.1753, "step": 6500 }, { "epoch": 3.2766990291262137, "grad_norm": 0.8825583457946777, "learning_rate": 1.808252427184466e-05, "loss": 0.1739, "step": 6750 }, { "epoch": 3.3980582524271843, "grad_norm": 1.0287333726882935, "learning_rate": 1.5048543689320387e-05, "loss": 0.1729, "step": 7000 }, { "epoch": 3.5194174757281553, "grad_norm": 0.809319794178009, "learning_rate": 1.2014563106796117e-05, "loss": 0.1718, "step": 7250 }, { "epoch": 3.6407766990291264, "grad_norm": 0.8941086530685425, "learning_rate": 8.980582524271845e-06, "loss": 0.1702, "step": 7500 }, { "epoch": 3.762135922330097, "grad_norm": 0.9008864760398865, "learning_rate": 5.946601941747574e-06, "loss": 0.1688, "step": 7750 }, { "epoch": 3.883495145631068, "grad_norm": 0.8730018138885498, "learning_rate": 2.912621359223301e-06, "loss": 0.1689, "step": 8000 }, { "epoch": 4.0, "step": 8240, "total_flos": 2.943708398017659e+18, "train_loss": 0.31335733145186045, "train_runtime": 45318.0558, "train_samples_per_second": 11.637, "train_steps_per_second": 0.182 } ], "logging_steps": 250, "max_steps": 8240, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.943708398017659e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }