File size: 2,237 Bytes
85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d b526ec0 85f308d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.769230769230769,
"eval_steps": 500,
"global_step": 27,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"grad_norm": 24.191700961175112,
"learning_rate": 6.666666666666667e-06,
"loss": 0.5581,
"step": 1
},
{
"epoch": 0.51,
"grad_norm": 12.576350745768869,
"learning_rate": 1.9659258262890683e-05,
"loss": 0.8334,
"step": 5
},
{
"epoch": 0.92,
"eval_loss": 1.4131343364715576,
"eval_runtime": 38.4185,
"eval_samples_per_second": 8.954,
"eval_steps_per_second": 0.573,
"step": 9
},
{
"epoch": 1.03,
"grad_norm": 17.837211332799143,
"learning_rate": 1.608761429008721e-05,
"loss": 0.738,
"step": 10
},
{
"epoch": 1.54,
"grad_norm": 3.797877129136344,
"learning_rate": 1e-05,
"loss": 0.6303,
"step": 15
},
{
"epoch": 1.95,
"eval_loss": 1.4060215950012207,
"eval_runtime": 39.241,
"eval_samples_per_second": 8.766,
"eval_steps_per_second": 0.561,
"step": 19
},
{
"epoch": 2.05,
"grad_norm": 7.485764754081964,
"learning_rate": 3.912385709912794e-06,
"loss": 0.5348,
"step": 20
},
{
"epoch": 2.56,
"grad_norm": 3.0173543034517323,
"learning_rate": 3.4074173710931804e-07,
"loss": 0.4647,
"step": 25
},
{
"epoch": 2.77,
"eval_loss": 1.4294962882995605,
"eval_runtime": 38.6474,
"eval_samples_per_second": 8.901,
"eval_steps_per_second": 0.569,
"step": 27
},
{
"epoch": 2.77,
"step": 27,
"total_flos": 5600905789440.0,
"train_loss": 0.6124309632513258,
"train_runtime": 676.0028,
"train_samples_per_second": 2.72,
"train_steps_per_second": 0.04
}
],
"logging_steps": 5,
"max_steps": 27,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 5600905789440.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|