File size: 2,841 Bytes

52850c9
 
 
 
 
 
 
 
 
 
 
 
f34cdcb
52850c9
f34cdcb
52850c9
 
 
 
f34cdcb
52850c9
f34cdcb
52850c9
 
 
 
f34cdcb
52850c9
f34cdcb
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
f34cdcb
 
 
52850c9
 
 
 
 
f34cdcb
 
 
 
 
52850c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f34cdcb
52850c9

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.84,
  "eval_steps": 500,
  "global_step": 24,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.32,
      "grad_norm": 18.83793067932129,
      "learning_rate": 0.00018333333333333334,
      "loss": 9.2102,
      "step": 2
    },
    {
      "epoch": 0.64,
      "grad_norm": 22.66363525390625,
      "learning_rate": 0.0001666666666666667,
      "loss": 8.2322,
      "step": 4
    },
    {
      "epoch": 0.96,
      "grad_norm": 22.428932189941406,
      "learning_rate": 0.00015000000000000001,
      "loss": 7.2138,
      "step": 6
    },
    {
      "epoch": 1.28,
      "grad_norm": 17.410696029663086,
      "learning_rate": 0.00014166666666666668,
      "loss": 6.6662,
      "step": 8
    },
    {
      "epoch": 1.6,
      "grad_norm": 12.254047393798828,
      "learning_rate": 0.000125,
      "loss": 6.6019,
      "step": 10
    },
    {
      "epoch": 1.92,
      "grad_norm": 10.925116539001465,
      "learning_rate": 0.00010833333333333333,
      "loss": 6.2901,
      "step": 12
    },
    {
      "epoch": 2.24,
      "grad_norm": 11.835088729858398,
      "learning_rate": 9.166666666666667e-05,
      "loss": 6.1701,
      "step": 14
    },
    {
      "epoch": 2.56,
      "grad_norm": 6.8969950675964355,
      "learning_rate": 7.500000000000001e-05,
      "loss": 6.0758,
      "step": 16
    },
    {
      "epoch": 2.88,
      "grad_norm": 10.971280097961426,
      "learning_rate": 5.833333333333334e-05,
      "loss": 5.953,
      "step": 18
    },
    {
      "epoch": 3.2,
      "grad_norm": 6.5526275634765625,
      "learning_rate": 4.166666666666667e-05,
      "loss": 6.1228,
      "step": 20
    },
    {
      "epoch": 3.52,
      "grad_norm": 4.4527459144592285,
      "learning_rate": 2.5e-05,
      "loss": 6.0307,
      "step": 22
    },
    {
      "epoch": 3.84,
      "grad_norm": 8.93628978729248,
      "learning_rate": 8.333333333333334e-06,
      "loss": 5.9651,
      "step": 24
    },
    {
      "epoch": 3.84,
      "step": 24,
      "total_flos": 110537516692488.0,
      "train_loss": 6.710986336072286,
      "train_runtime": 634.4855,
      "train_samples_per_second": 0.63,
      "train_steps_per_second": 0.038
    }
  ],
  "logging_steps": 2,
  "max_steps": 24,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 110537516692488.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}