{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 426, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.176056338028169, "grad_norm": 1.0253130197525024, "learning_rate": 0.00019958367684748586, "loss": 1.8153, "step": 25 }, { "epoch": 0.352112676056338, "grad_norm": 0.5390976071357727, "learning_rate": 0.00019606536598722435, "loss": 1.384, "step": 50 }, { "epoch": 0.528169014084507, "grad_norm": 0.6293356418609619, "learning_rate": 0.0001890833789866129, "loss": 1.222, "step": 75 }, { "epoch": 0.704225352112676, "grad_norm": 0.7000795602798462, "learning_rate": 0.00017888945424832895, "loss": 1.1705, "step": 100 }, { "epoch": 0.8802816901408451, "grad_norm": 0.5979147553443909, "learning_rate": 0.00016585113790650388, "loss": 1.1829, "step": 125 }, { "epoch": 1.056338028169014, "grad_norm": 0.6060128211975098, "learning_rate": 0.00015043853180022836, "loss": 1.0767, "step": 150 }, { "epoch": 1.232394366197183, "grad_norm": 0.7619791030883789, "learning_rate": 0.00013320734375908607, "loss": 1.0177, "step": 175 }, { "epoch": 1.408450704225352, "grad_norm": 0.8311352133750916, "learning_rate": 0.00011477885132961679, "loss": 1.0361, "step": 200 }, { "epoch": 1.584507042253521, "grad_norm": 0.7331663370132446, "learning_rate": 9.581750135876277e-05, "loss": 0.9322, "step": 225 }, { "epoch": 1.76056338028169, "grad_norm": 0.5762674808502197, "learning_rate": 7.700695309049767e-05, "loss": 0.792, "step": 250 }, { "epoch": 1.936619718309859, "grad_norm": 0.7466037273406982, "learning_rate": 5.902542855160642e-05, "loss": 0.837, "step": 275 }, { "epoch": 2.112676056338028, "grad_norm": 0.7813904285430908, "learning_rate": 4.252125897855932e-05, "loss": 0.8631, "step": 300 }, { "epoch": 2.288732394366197, "grad_norm": 0.6281647086143494, "learning_rate": 2.8089508969081e-05, "loss": 0.7121, "step": 325 }, { "epoch": 2.464788732394366, "grad_norm": 1.0807723999023438, "learning_rate": 1.625052118420889e-05, "loss": 0.7519, "step": 350 }, { "epoch": 2.640845070422535, "grad_norm": 0.8650486469268799, "learning_rate": 7.431155180401705e-06, "loss": 0.8256, "step": 375 }, { "epoch": 2.816901408450704, "grad_norm": 0.860803484916687, "learning_rate": 1.9493968132951458e-06, "loss": 0.7998, "step": 400 }, { "epoch": 2.992957746478873, "grad_norm": 0.8234962224960327, "learning_rate": 2.893127359282488e-09, "loss": 0.683, "step": 425 } ], "logging_steps": 25, "max_steps": 426, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7981829121835008.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }