{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9936, "eval_steps": 500, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.192, "grad_norm": 0.3010159432888031, "learning_rate": 3.191489361702128e-05, "loss": 2.2514, "step": 15 }, { "epoch": 0.384, "grad_norm": 0.2521279752254486, "learning_rate": 6.382978723404256e-05, "loss": 2.1582, "step": 30 }, { "epoch": 0.576, "grad_norm": 0.2557184100151062, "learning_rate": 9.574468085106384e-05, "loss": 1.8683, "step": 45 }, { "epoch": 0.768, "grad_norm": 0.22737619280815125, "learning_rate": 0.00012765957446808513, "loss": 1.6607, "step": 60 }, { "epoch": 0.96, "grad_norm": 0.19700799882411957, "learning_rate": 0.00015957446808510637, "loss": 1.4682, "step": 75 }, { "epoch": 1.152, "grad_norm": 0.2368774265050888, "learning_rate": 0.00019148936170212768, "loss": 1.3196, "step": 90 }, { "epoch": 1.3439999999999999, "grad_norm": 0.3574005365371704, "learning_rate": 0.00018990825688073394, "loss": 1.1283, "step": 105 }, { "epoch": 1.536, "grad_norm": 0.3291820287704468, "learning_rate": 0.0001761467889908257, "loss": 0.9629, "step": 120 }, { "epoch": 1.728, "grad_norm": 0.4235256612300873, "learning_rate": 0.00016238532110091745, "loss": 0.8761, "step": 135 }, { "epoch": 1.92, "grad_norm": 0.3522832691669464, "learning_rate": 0.00014862385321100919, "loss": 0.7764, "step": 150 }, { "epoch": 2.112, "grad_norm": 0.3914245367050171, "learning_rate": 0.00013486238532110092, "loss": 0.6717, "step": 165 }, { "epoch": 2.304, "grad_norm": 0.3502177894115448, "learning_rate": 0.00012110091743119268, "loss": 0.6329, "step": 180 }, { "epoch": 2.496, "grad_norm": 0.5064918398857117, "learning_rate": 0.0001073394495412844, "loss": 0.5551, "step": 195 }, { "epoch": 2.6879999999999997, "grad_norm": 0.4178985357284546, "learning_rate": 9.357798165137616e-05, "loss": 0.5132, "step": 210 }, { "epoch": 2.88, "grad_norm": 0.3729366958141327, "learning_rate": 7.98165137614679e-05, "loss": 0.4831, "step": 225 }, { "epoch": 3.072, "grad_norm": 0.30623266100883484, "learning_rate": 6.605504587155963e-05, "loss": 0.4484, "step": 240 }, { "epoch": 3.2640000000000002, "grad_norm": 0.31436726450920105, "learning_rate": 5.229357798165138e-05, "loss": 0.4255, "step": 255 }, { "epoch": 3.456, "grad_norm": 0.32084089517593384, "learning_rate": 3.8532110091743125e-05, "loss": 0.3731, "step": 270 }, { "epoch": 3.648, "grad_norm": 0.3452220857143402, "learning_rate": 2.4770642201834864e-05, "loss": 0.3786, "step": 285 }, { "epoch": 3.84, "grad_norm": 0.3483213782310486, "learning_rate": 1.1009174311926607e-05, "loss": 0.3988, "step": 300 } ], "logging_steps": 15, "max_steps": 312, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.447199037200794e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }