File size: 2,075 Bytes
85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d dca49ad 85f308d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 21,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"grad_norm": 10.547944577355347,
"learning_rate": 6.666666666666667e-06,
"loss": 1.7384,
"step": 1
},
{
"epoch": 0.71,
"grad_norm": 13.625247977537818,
"learning_rate": 1.9396926207859085e-05,
"loss": 1.7589,
"step": 5
},
{
"epoch": 1.0,
"eval_loss": 1.7180463075637817,
"eval_runtime": 1.7598,
"eval_samples_per_second": 7.387,
"eval_steps_per_second": 0.568,
"step": 7
},
{
"epoch": 1.43,
"grad_norm": 5.744275140398176,
"learning_rate": 1.342020143325669e-05,
"loss": 1.4041,
"step": 10
},
{
"epoch": 2.0,
"eval_loss": 1.763495683670044,
"eval_runtime": 1.6856,
"eval_samples_per_second": 7.712,
"eval_steps_per_second": 0.593,
"step": 14
},
{
"epoch": 2.14,
"grad_norm": 7.096816156714056,
"learning_rate": 5.000000000000003e-06,
"loss": 1.0218,
"step": 15
},
{
"epoch": 2.86,
"grad_norm": 4.77063469960506,
"learning_rate": 1.519224698779198e-07,
"loss": 0.6946,
"step": 20
},
{
"epoch": 3.0,
"eval_loss": 1.8394839763641357,
"eval_runtime": 1.7001,
"eval_samples_per_second": 7.647,
"eval_steps_per_second": 0.588,
"step": 21
},
{
"epoch": 3.0,
"step": 21,
"total_flos": 4344627855360.0,
"train_loss": 1.1876622920944577,
"train_runtime": 427.6941,
"train_samples_per_second": 3.065,
"train_steps_per_second": 0.049
}
],
"logging_steps": 5,
"max_steps": 21,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 4344627855360.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|