{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5970149253731343, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011940298507462687, "eval_loss": 1.5449435710906982, "eval_runtime": 33.4954, "eval_samples_per_second": 4.21, "eval_steps_per_second": 0.537, "step": 1 }, { "epoch": 0.03582089552238806, "grad_norm": 7.289577484130859, "learning_rate": 3e-05, "loss": 6.1357, "step": 3 }, { "epoch": 0.05970149253731343, "eval_loss": 1.4649710655212402, "eval_runtime": 33.7856, "eval_samples_per_second": 4.173, "eval_steps_per_second": 0.533, "step": 5 }, { "epoch": 0.07164179104477612, "grad_norm": 7.276589393615723, "learning_rate": 6e-05, "loss": 5.8751, "step": 6 }, { "epoch": 0.10746268656716418, "grad_norm": 4.452966213226318, "learning_rate": 9e-05, "loss": 5.6047, "step": 9 }, { "epoch": 0.11940298507462686, "eval_loss": 1.3938730955123901, "eval_runtime": 33.7912, "eval_samples_per_second": 4.173, "eval_steps_per_second": 0.533, "step": 10 }, { "epoch": 0.14328358208955225, "grad_norm": 4.445888042449951, "learning_rate": 9.938441702975689e-05, "loss": 5.5517, "step": 12 }, { "epoch": 0.1791044776119403, "grad_norm": 3.2838003635406494, "learning_rate": 9.619397662556435e-05, "loss": 5.5237, "step": 15 }, { "epoch": 0.1791044776119403, "eval_loss": 1.3736783266067505, "eval_runtime": 33.7833, "eval_samples_per_second": 4.174, "eval_steps_per_second": 0.533, "step": 15 }, { "epoch": 0.21492537313432836, "grad_norm": 3.3037467002868652, "learning_rate": 9.045084971874738e-05, "loss": 5.4188, "step": 18 }, { "epoch": 0.23880597014925373, "eval_loss": 1.3579882383346558, "eval_runtime": 33.8243, "eval_samples_per_second": 4.169, "eval_steps_per_second": 0.532, "step": 20 }, { "epoch": 0.2507462686567164, "grad_norm": 3.0785763263702393, "learning_rate": 8.247240241650918e-05, "loss": 5.4397, "step": 21 }, { "epoch": 0.2865671641791045, "grad_norm": 3.0347342491149902, "learning_rate": 7.269952498697734e-05, "loss": 5.3316, "step": 24 }, { "epoch": 0.29850746268656714, "eval_loss": 1.3502321243286133, "eval_runtime": 33.8052, "eval_samples_per_second": 4.171, "eval_steps_per_second": 0.532, "step": 25 }, { "epoch": 0.32238805970149254, "grad_norm": 2.8705027103424072, "learning_rate": 6.167226819279528e-05, "loss": 5.4831, "step": 27 }, { "epoch": 0.3582089552238806, "grad_norm": 2.9315133094787598, "learning_rate": 5e-05, "loss": 5.4605, "step": 30 }, { "epoch": 0.3582089552238806, "eval_loss": 1.342836856842041, "eval_runtime": 33.8234, "eval_samples_per_second": 4.169, "eval_steps_per_second": 0.532, "step": 30 }, { "epoch": 0.3940298507462687, "grad_norm": 2.7453272342681885, "learning_rate": 3.832773180720475e-05, "loss": 5.4128, "step": 33 }, { "epoch": 0.417910447761194, "eval_loss": 1.3384729623794556, "eval_runtime": 33.805, "eval_samples_per_second": 4.171, "eval_steps_per_second": 0.532, "step": 35 }, { "epoch": 0.4298507462686567, "grad_norm": 2.74045467376709, "learning_rate": 2.7300475013022663e-05, "loss": 5.2587, "step": 36 }, { "epoch": 0.46567164179104475, "grad_norm": 2.70888090133667, "learning_rate": 1.7527597583490822e-05, "loss": 5.2953, "step": 39 }, { "epoch": 0.47761194029850745, "eval_loss": 1.3347140550613403, "eval_runtime": 33.828, "eval_samples_per_second": 4.168, "eval_steps_per_second": 0.532, "step": 40 }, { "epoch": 0.5014925373134328, "grad_norm": 2.642303228378296, "learning_rate": 9.549150281252633e-06, "loss": 5.2156, "step": 42 }, { "epoch": 0.5373134328358209, "grad_norm": 2.631282091140747, "learning_rate": 3.8060233744356633e-06, "loss": 5.3161, "step": 45 }, { "epoch": 0.5373134328358209, "eval_loss": 1.3332561254501343, "eval_runtime": 33.8375, "eval_samples_per_second": 4.167, "eval_steps_per_second": 0.532, "step": 45 }, { "epoch": 0.573134328358209, "grad_norm": 2.71305251121521, "learning_rate": 6.15582970243117e-07, "loss": 5.3342, "step": 48 }, { "epoch": 0.5970149253731343, "eval_loss": 1.3329530954360962, "eval_runtime": 33.8044, "eval_samples_per_second": 4.171, "eval_steps_per_second": 0.532, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.430998008987648e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }