{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.05458515283842795,
  "eval_steps": 9,
  "global_step": 25,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.002183406113537118,
      "grad_norm": 0.022989921271800995,
      "learning_rate": 1e-05,
      "loss": 10.379,
      "step": 1
    },
    {
      "epoch": 0.002183406113537118,
      "eval_loss": 10.378771781921387,
      "eval_runtime": 4.0177,
      "eval_samples_per_second": 96.074,
      "eval_steps_per_second": 12.196,
      "step": 1
    },
    {
      "epoch": 0.004366812227074236,
      "grad_norm": 0.02065795473754406,
      "learning_rate": 2e-05,
      "loss": 10.3789,
      "step": 2
    },
    {
      "epoch": 0.006550218340611353,
      "grad_norm": 0.024160347878932953,
      "learning_rate": 3e-05,
      "loss": 10.3789,
      "step": 3
    },
    {
      "epoch": 0.008733624454148471,
      "grad_norm": 0.02151237055659294,
      "learning_rate": 4e-05,
      "loss": 10.3807,
      "step": 4
    },
    {
      "epoch": 0.010917030567685589,
      "grad_norm": 0.022390680387616158,
      "learning_rate": 5e-05,
      "loss": 10.3787,
      "step": 5
    },
    {
      "epoch": 0.013100436681222707,
      "grad_norm": 0.027181802317500114,
      "learning_rate": 6e-05,
      "loss": 10.3783,
      "step": 6
    },
    {
      "epoch": 0.015283842794759825,
      "grad_norm": 0.023402391001582146,
      "learning_rate": 7e-05,
      "loss": 10.3793,
      "step": 7
    },
    {
      "epoch": 0.017467248908296942,
      "grad_norm": 0.023139122873544693,
      "learning_rate": 8e-05,
      "loss": 10.3785,
      "step": 8
    },
    {
      "epoch": 0.019650655021834062,
      "grad_norm": 0.024676023051142693,
      "learning_rate": 9e-05,
      "loss": 10.3773,
      "step": 9
    },
    {
      "epoch": 0.019650655021834062,
      "eval_loss": 10.378523826599121,
      "eval_runtime": 3.9783,
      "eval_samples_per_second": 97.026,
      "eval_steps_per_second": 12.317,
      "step": 9
    },
    {
      "epoch": 0.021834061135371178,
      "grad_norm": 0.02337835170328617,
      "learning_rate": 0.0001,
      "loss": 10.3801,
      "step": 10
    },
    {
      "epoch": 0.024017467248908297,
      "grad_norm": 0.02416376955807209,
      "learning_rate": 9.99695413509548e-05,
      "loss": 10.379,
      "step": 11
    },
    {
      "epoch": 0.026200873362445413,
      "grad_norm": 0.024184616282582283,
      "learning_rate": 9.987820251299122e-05,
      "loss": 10.3786,
      "step": 12
    },
    {
      "epoch": 0.028384279475982533,
      "grad_norm": 0.023965170606970787,
      "learning_rate": 9.972609476841367e-05,
      "loss": 10.3802,
      "step": 13
    },
    {
      "epoch": 0.03056768558951965,
      "grad_norm": 0.02749583125114441,
      "learning_rate": 9.951340343707852e-05,
      "loss": 10.3808,
      "step": 14
    },
    {
      "epoch": 0.03275109170305677,
      "grad_norm": 0.02478426694869995,
      "learning_rate": 9.924038765061042e-05,
      "loss": 10.3783,
      "step": 15
    },
    {
      "epoch": 0.034934497816593885,
      "grad_norm": 0.02539663203060627,
      "learning_rate": 9.890738003669029e-05,
      "loss": 10.3789,
      "step": 16
    },
    {
      "epoch": 0.03711790393013101,
      "grad_norm": 0.0303711649030447,
      "learning_rate": 9.851478631379982e-05,
      "loss": 10.3786,
      "step": 17
    },
    {
      "epoch": 0.039301310043668124,
      "grad_norm": 0.028177475556731224,
      "learning_rate": 9.806308479691595e-05,
      "loss": 10.3755,
      "step": 18
    },
    {
      "epoch": 0.039301310043668124,
      "eval_loss": 10.377870559692383,
      "eval_runtime": 4.0384,
      "eval_samples_per_second": 95.583,
      "eval_steps_per_second": 12.134,
      "step": 18
    },
    {
      "epoch": 0.04148471615720524,
      "grad_norm": 0.027257202193140984,
      "learning_rate": 9.755282581475769e-05,
      "loss": 10.3797,
      "step": 19
    },
    {
      "epoch": 0.043668122270742356,
      "grad_norm": 0.026211563497781754,
      "learning_rate": 9.698463103929542e-05,
      "loss": 10.3795,
      "step": 20
    },
    {
      "epoch": 0.04585152838427948,
      "grad_norm": 0.025603869929909706,
      "learning_rate": 9.635919272833938e-05,
      "loss": 10.3768,
      "step": 21
    },
    {
      "epoch": 0.048034934497816595,
      "grad_norm": 0.024822156876325607,
      "learning_rate": 9.567727288213005e-05,
      "loss": 10.3782,
      "step": 22
    },
    {
      "epoch": 0.05021834061135371,
      "grad_norm": 0.03178217634558678,
      "learning_rate": 9.493970231495835e-05,
      "loss": 10.3775,
      "step": 23
    },
    {
      "epoch": 0.05240174672489083,
      "grad_norm": 0.02607903815805912,
      "learning_rate": 9.414737964294636e-05,
      "loss": 10.3809,
      "step": 24
    },
    {
      "epoch": 0.05458515283842795,
      "grad_norm": 0.0315554179251194,
      "learning_rate": 9.330127018922194e-05,
      "loss": 10.3776,
      "step": 25
    }
  ],
  "logging_steps": 1,
  "max_steps": 100,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2615122329600.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}