{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.01264942128897603, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002529884257795206, "eval_loss": 8.741473197937012, "eval_runtime": 586.1352, "eval_samples_per_second": 11.359, "eval_steps_per_second": 1.421, "step": 1 }, { "epoch": 0.0007589652773385617, "grad_norm": 26.1434383392334, "learning_rate": 3e-05, "loss": 8.4933, "step": 3 }, { "epoch": 0.001264942128897603, "eval_loss": 5.998294830322266, "eval_runtime": 591.3249, "eval_samples_per_second": 11.259, "eval_steps_per_second": 1.409, "step": 5 }, { "epoch": 0.0015179305546771235, "grad_norm": 16.133859634399414, "learning_rate": 6e-05, "loss": 7.1644, "step": 6 }, { "epoch": 0.002276895832015685, "grad_norm": 13.156013488769531, "learning_rate": 9e-05, "loss": 5.3902, "step": 9 }, { "epoch": 0.002529884257795206, "eval_loss": 4.7294020652771, "eval_runtime": 591.5744, "eval_samples_per_second": 11.255, "eval_steps_per_second": 1.408, "step": 10 }, { "epoch": 0.003035861109354247, "grad_norm": 17.320507049560547, "learning_rate": 9.938441702975689e-05, "loss": 4.86, "step": 12 }, { "epoch": 0.0037948263866928087, "grad_norm": 11.7354736328125, "learning_rate": 9.619397662556435e-05, "loss": 4.0561, "step": 15 }, { "epoch": 0.0037948263866928087, "eval_loss": 4.001941204071045, "eval_runtime": 591.4343, "eval_samples_per_second": 11.257, "eval_steps_per_second": 1.408, "step": 15 }, { "epoch": 0.00455379166403137, "grad_norm": 10.361626625061035, "learning_rate": 9.045084971874738e-05, "loss": 4.1918, "step": 18 }, { "epoch": 0.005059768515590412, "eval_loss": 3.7069029808044434, "eval_runtime": 591.3495, "eval_samples_per_second": 11.259, "eval_steps_per_second": 1.409, "step": 20 }, { "epoch": 0.005312756941369933, "grad_norm": 10.757951736450195, "learning_rate": 8.247240241650918e-05, "loss": 3.6442, "step": 21 }, { "epoch": 0.006071722218708494, "grad_norm": 9.885283470153809, "learning_rate": 7.269952498697734e-05, "loss": 3.736, "step": 24 }, { "epoch": 0.006324710644488015, "eval_loss": 3.5459110736846924, "eval_runtime": 591.4792, "eval_samples_per_second": 11.257, "eval_steps_per_second": 1.408, "step": 25 }, { "epoch": 0.006830687496047056, "grad_norm": 13.528706550598145, "learning_rate": 6.167226819279528e-05, "loss": 3.6003, "step": 27 }, { "epoch": 0.007589652773385617, "grad_norm": 10.549182891845703, "learning_rate": 5e-05, "loss": 3.6271, "step": 30 }, { "epoch": 0.007589652773385617, "eval_loss": 3.423410415649414, "eval_runtime": 591.4513, "eval_samples_per_second": 11.257, "eval_steps_per_second": 1.408, "step": 30 }, { "epoch": 0.008348618050724179, "grad_norm": 10.609362602233887, "learning_rate": 3.832773180720475e-05, "loss": 3.0585, "step": 33 }, { "epoch": 0.00885459490228322, "eval_loss": 3.3249850273132324, "eval_runtime": 591.8226, "eval_samples_per_second": 11.25, "eval_steps_per_second": 1.408, "step": 35 }, { "epoch": 0.00910758332806274, "grad_norm": 9.735877990722656, "learning_rate": 2.7300475013022663e-05, "loss": 3.5678, "step": 36 }, { "epoch": 0.009866548605401303, "grad_norm": 9.53584098815918, "learning_rate": 1.7527597583490822e-05, "loss": 3.331, "step": 39 }, { "epoch": 0.010119537031180823, "eval_loss": 3.270254135131836, "eval_runtime": 591.7633, "eval_samples_per_second": 11.251, "eval_steps_per_second": 1.408, "step": 40 }, { "epoch": 0.010625513882739865, "grad_norm": 9.458514213562012, "learning_rate": 9.549150281252633e-06, "loss": 3.4146, "step": 42 }, { "epoch": 0.011384479160078426, "grad_norm": 8.75707721710205, "learning_rate": 3.8060233744356633e-06, "loss": 3.0878, "step": 45 }, { "epoch": 0.011384479160078426, "eval_loss": 3.243670701980591, "eval_runtime": 591.8535, "eval_samples_per_second": 11.249, "eval_steps_per_second": 1.407, "step": 45 }, { "epoch": 0.012143444437416988, "grad_norm": 9.111993789672852, "learning_rate": 6.15582970243117e-07, "loss": 3.0088, "step": 48 }, { "epoch": 0.01264942128897603, "eval_loss": 3.237314462661743, "eval_runtime": 592.0112, "eval_samples_per_second": 11.246, "eval_steps_per_second": 1.407, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.81641131753472e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }