cwaud's picture
Training in progress, step 10, checkpoint
b1c7246 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.015255530129672006,
"eval_steps": 3,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015255530129672007,
"grad_norm": 3.5874860286712646,
"learning_rate": 2e-05,
"loss": 1.924,
"step": 1
},
{
"epoch": 0.0015255530129672007,
"eval_loss": 3.1886789798736572,
"eval_runtime": 48.1701,
"eval_samples_per_second": 5.75,
"eval_steps_per_second": 2.886,
"step": 1
},
{
"epoch": 0.0030511060259344014,
"grad_norm": 4.946148872375488,
"learning_rate": 4e-05,
"loss": 3.4698,
"step": 2
},
{
"epoch": 0.004576659038901602,
"grad_norm": 7.080003261566162,
"learning_rate": 6e-05,
"loss": 4.0679,
"step": 3
},
{
"epoch": 0.004576659038901602,
"eval_loss": 3.142275810241699,
"eval_runtime": 50.3607,
"eval_samples_per_second": 5.5,
"eval_steps_per_second": 2.76,
"step": 3
},
{
"epoch": 0.006102212051868803,
"grad_norm": 4.932255744934082,
"learning_rate": 8e-05,
"loss": 3.0787,
"step": 4
},
{
"epoch": 0.007627765064836003,
"grad_norm": 6.312702655792236,
"learning_rate": 0.0001,
"loss": 3.8769,
"step": 5
},
{
"epoch": 0.009153318077803204,
"grad_norm": 7.200475215911865,
"learning_rate": 0.00012,
"loss": 3.5321,
"step": 6
},
{
"epoch": 0.009153318077803204,
"eval_loss": 2.6806535720825195,
"eval_runtime": 51.326,
"eval_samples_per_second": 5.397,
"eval_steps_per_second": 2.708,
"step": 6
},
{
"epoch": 0.010678871090770405,
"grad_norm": 3.708185911178589,
"learning_rate": 0.00014,
"loss": 1.4473,
"step": 7
},
{
"epoch": 0.012204424103737605,
"grad_norm": 4.481770038604736,
"learning_rate": 0.00016,
"loss": 1.8168,
"step": 8
},
{
"epoch": 0.013729977116704805,
"grad_norm": 8.652414321899414,
"learning_rate": 0.00018,
"loss": 1.4997,
"step": 9
},
{
"epoch": 0.013729977116704805,
"eval_loss": 2.371807336807251,
"eval_runtime": 50.9338,
"eval_samples_per_second": 5.438,
"eval_steps_per_second": 2.729,
"step": 9
},
{
"epoch": 0.015255530129672006,
"grad_norm": 15.314831733703613,
"learning_rate": 0.0002,
"loss": 1.8957,
"step": 10
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2245532414115840.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}