{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8771929824561403, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017543859649122806, "eval_loss": 10.366873741149902, "eval_runtime": 0.1727, "eval_samples_per_second": 555.877, "eval_steps_per_second": 69.485, "step": 1 }, { "epoch": 0.05263157894736842, "grad_norm": 0.07264672964811325, "learning_rate": 3e-05, "loss": 10.3711, "step": 3 }, { "epoch": 0.08771929824561403, "eval_loss": 10.366547584533691, "eval_runtime": 0.1674, "eval_samples_per_second": 573.519, "eval_steps_per_second": 71.69, "step": 5 }, { "epoch": 0.10526315789473684, "grad_norm": 0.08514741063117981, "learning_rate": 6e-05, "loss": 10.3717, "step": 6 }, { "epoch": 0.15789473684210525, "grad_norm": 0.08602377027273178, "learning_rate": 9e-05, "loss": 10.3709, "step": 9 }, { "epoch": 0.17543859649122806, "eval_loss": 10.365401268005371, "eval_runtime": 0.1659, "eval_samples_per_second": 578.602, "eval_steps_per_second": 72.325, "step": 10 }, { "epoch": 0.21052631578947367, "grad_norm": 0.07277622818946838, "learning_rate": 9.938441702975689e-05, "loss": 10.3704, "step": 12 }, { "epoch": 0.2631578947368421, "grad_norm": 0.07048846036195755, "learning_rate": 9.619397662556435e-05, "loss": 10.3696, "step": 15 }, { "epoch": 0.2631578947368421, "eval_loss": 10.363677024841309, "eval_runtime": 0.1716, "eval_samples_per_second": 559.366, "eval_steps_per_second": 69.921, "step": 15 }, { "epoch": 0.3157894736842105, "grad_norm": 0.09568169713020325, "learning_rate": 9.045084971874738e-05, "loss": 10.3689, "step": 18 }, { "epoch": 0.3508771929824561, "eval_loss": 10.36184310913086, "eval_runtime": 0.1886, "eval_samples_per_second": 508.996, "eval_steps_per_second": 63.625, "step": 20 }, { "epoch": 0.3684210526315789, "grad_norm": 0.08425775915384293, "learning_rate": 8.247240241650918e-05, "loss": 10.3657, "step": 21 }, { "epoch": 0.42105263157894735, "grad_norm": 0.11433319002389908, "learning_rate": 7.269952498697734e-05, "loss": 10.3645, "step": 24 }, { "epoch": 0.43859649122807015, "eval_loss": 10.35986328125, "eval_runtime": 0.1662, "eval_samples_per_second": 577.559, "eval_steps_per_second": 72.195, "step": 25 }, { "epoch": 0.47368421052631576, "grad_norm": 0.1078442707657814, "learning_rate": 6.167226819279528e-05, "loss": 10.3641, "step": 27 }, { "epoch": 0.5263157894736842, "grad_norm": 0.11035740375518799, "learning_rate": 5e-05, "loss": 10.3617, "step": 30 }, { "epoch": 0.5263157894736842, "eval_loss": 10.358070373535156, "eval_runtime": 0.1694, "eval_samples_per_second": 566.668, "eval_steps_per_second": 70.833, "step": 30 }, { "epoch": 0.5789473684210527, "grad_norm": 0.158222496509552, "learning_rate": 3.832773180720475e-05, "loss": 10.3614, "step": 33 }, { "epoch": 0.6140350877192983, "eval_loss": 10.356663703918457, "eval_runtime": 0.1699, "eval_samples_per_second": 565.089, "eval_steps_per_second": 70.636, "step": 35 }, { "epoch": 0.631578947368421, "grad_norm": 0.13146649301052094, "learning_rate": 2.7300475013022663e-05, "loss": 10.3615, "step": 36 }, { "epoch": 0.6842105263157895, "grad_norm": 0.12355206906795502, "learning_rate": 1.7527597583490822e-05, "loss": 10.3609, "step": 39 }, { "epoch": 0.7017543859649122, "eval_loss": 10.355828285217285, "eval_runtime": 0.1665, "eval_samples_per_second": 576.456, "eval_steps_per_second": 72.057, "step": 40 }, { "epoch": 0.7368421052631579, "grad_norm": 0.13545456528663635, "learning_rate": 9.549150281252633e-06, "loss": 10.3617, "step": 42 }, { "epoch": 0.7894736842105263, "grad_norm": 0.1202831044793129, "learning_rate": 3.8060233744356633e-06, "loss": 10.3582, "step": 45 }, { "epoch": 0.7894736842105263, "eval_loss": 10.355480194091797, "eval_runtime": 0.1709, "eval_samples_per_second": 561.655, "eval_steps_per_second": 70.207, "step": 45 }, { "epoch": 0.8421052631578947, "grad_norm": 0.1292213350534439, "learning_rate": 6.15582970243117e-07, "loss": 10.3596, "step": 48 }, { "epoch": 0.8771929824561403, "eval_loss": 10.355441093444824, "eval_runtime": 0.1692, "eval_samples_per_second": 567.394, "eval_steps_per_second": 70.924, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5577533030400.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }