{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.0429553264604811,
  "eval_steps": 9,
  "global_step": 25,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.001718213058419244,
      "grad_norm": 16.851408004760742,
      "learning_rate": 1e-05,
      "loss": 16.6484,
      "step": 1
    },
    {
      "epoch": 0.001718213058419244,
      "eval_loss": 16.627044677734375,
      "eval_runtime": 9.9282,
      "eval_samples_per_second": 49.354,
      "eval_steps_per_second": 6.245,
      "step": 1
    },
    {
      "epoch": 0.003436426116838488,
      "grad_norm": 16.717243194580078,
      "learning_rate": 2e-05,
      "loss": 16.3156,
      "step": 2
    },
    {
      "epoch": 0.005154639175257732,
      "grad_norm": 16.413101196289062,
      "learning_rate": 3e-05,
      "loss": 16.6699,
      "step": 3
    },
    {
      "epoch": 0.006872852233676976,
      "grad_norm": 16.836313247680664,
      "learning_rate": 4e-05,
      "loss": 16.6116,
      "step": 4
    },
    {
      "epoch": 0.00859106529209622,
      "grad_norm": 16.442672729492188,
      "learning_rate": 5e-05,
      "loss": 16.358,
      "step": 5
    },
    {
      "epoch": 0.010309278350515464,
      "grad_norm": 14.661097526550293,
      "learning_rate": 6e-05,
      "loss": 15.7403,
      "step": 6
    },
    {
      "epoch": 0.012027491408934709,
      "grad_norm": 15.021746635437012,
      "learning_rate": 7e-05,
      "loss": 16.1866,
      "step": 7
    },
    {
      "epoch": 0.013745704467353952,
      "grad_norm": 13.62826919555664,
      "learning_rate": 8e-05,
      "loss": 15.7283,
      "step": 8
    },
    {
      "epoch": 0.015463917525773196,
      "grad_norm": 12.390254974365234,
      "learning_rate": 9e-05,
      "loss": 15.0803,
      "step": 9
    },
    {
      "epoch": 0.015463917525773196,
      "eval_loss": 14.207258224487305,
      "eval_runtime": 8.7119,
      "eval_samples_per_second": 56.245,
      "eval_steps_per_second": 7.117,
      "step": 9
    },
    {
      "epoch": 0.01718213058419244,
      "grad_norm": 11.050700187683105,
      "learning_rate": 0.0001,
      "loss": 14.3405,
      "step": 10
    },
    {
      "epoch": 0.018900343642611683,
      "grad_norm": 10.292141914367676,
      "learning_rate": 9.99695413509548e-05,
      "loss": 13.1701,
      "step": 11
    },
    {
      "epoch": 0.020618556701030927,
      "grad_norm": 12.430923461914062,
      "learning_rate": 9.987820251299122e-05,
      "loss": 13.263,
      "step": 12
    },
    {
      "epoch": 0.022336769759450172,
      "grad_norm": 11.984938621520996,
      "learning_rate": 9.972609476841367e-05,
      "loss": 12.0077,
      "step": 13
    },
    {
      "epoch": 0.024054982817869417,
      "grad_norm": 13.232406616210938,
      "learning_rate": 9.951340343707852e-05,
      "loss": 12.0673,
      "step": 14
    },
    {
      "epoch": 0.02577319587628866,
      "grad_norm": 12.7660493850708,
      "learning_rate": 9.924038765061042e-05,
      "loss": 10.8489,
      "step": 15
    },
    {
      "epoch": 0.027491408934707903,
      "grad_norm": 13.3019437789917,
      "learning_rate": 9.890738003669029e-05,
      "loss": 10.213,
      "step": 16
    },
    {
      "epoch": 0.029209621993127148,
      "grad_norm": 13.64773178100586,
      "learning_rate": 9.851478631379982e-05,
      "loss": 9.3943,
      "step": 17
    },
    {
      "epoch": 0.030927835051546393,
      "grad_norm": 14.809378623962402,
      "learning_rate": 9.806308479691595e-05,
      "loss": 8.7002,
      "step": 18
    },
    {
      "epoch": 0.030927835051546393,
      "eval_loss": 8.073286056518555,
      "eval_runtime": 8.6987,
      "eval_samples_per_second": 56.33,
      "eval_steps_per_second": 7.128,
      "step": 18
    },
    {
      "epoch": 0.03264604810996564,
      "grad_norm": 15.218389511108398,
      "learning_rate": 9.755282581475769e-05,
      "loss": 7.7854,
      "step": 19
    },
    {
      "epoch": 0.03436426116838488,
      "grad_norm": 13.829675674438477,
      "learning_rate": 9.698463103929542e-05,
      "loss": 7.2369,
      "step": 20
    },
    {
      "epoch": 0.03608247422680412,
      "grad_norm": 12.88297176361084,
      "learning_rate": 9.635919272833938e-05,
      "loss": 5.9757,
      "step": 21
    },
    {
      "epoch": 0.037800687285223365,
      "grad_norm": 10.21223258972168,
      "learning_rate": 9.567727288213005e-05,
      "loss": 5.9149,
      "step": 22
    },
    {
      "epoch": 0.03951890034364261,
      "grad_norm": 9.509270668029785,
      "learning_rate": 9.493970231495835e-05,
      "loss": 5.1372,
      "step": 23
    },
    {
      "epoch": 0.041237113402061855,
      "grad_norm": 9.279484748840332,
      "learning_rate": 9.414737964294636e-05,
      "loss": 4.7909,
      "step": 24
    },
    {
      "epoch": 0.0429553264604811,
      "grad_norm": 8.418981552124023,
      "learning_rate": 9.330127018922194e-05,
      "loss": 4.4278,
      "step": 25
    }
  ],
  "logging_steps": 1,
  "max_steps": 100,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 25,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 659478533898240.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}