cnn_10k_llama2 / checkpoint-2541 /trainer_state.json
Username6568's picture
Upload folder using huggingface_hub
026542d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 94.99065420560747,
"eval_steps": 500,
"global_step": 2541,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.18,
"learning_rate": 0.0003269230769230769,
"loss": 1.6646,
"step": 85
},
{
"epoch": 6.36,
"learning_rate": 0.0006538461538461538,
"loss": 1.517,
"step": 170
},
{
"epoch": 9.53,
"learning_rate": 0.0009807692307692308,
"loss": 1.4586,
"step": 255
},
{
"epoch": 12.71,
"learning_rate": 0.0009658119658119658,
"loss": 1.3597,
"step": 340
},
{
"epoch": 15.89,
"learning_rate": 0.0009294871794871796,
"loss": 1.2625,
"step": 425
},
{
"epoch": 19.07,
"learning_rate": 0.0008931623931623932,
"loss": 1.1835,
"step": 510
},
{
"epoch": 22.24,
"learning_rate": 0.0008568376068376068,
"loss": 1.1184,
"step": 595
},
{
"epoch": 25.42,
"learning_rate": 0.0008205128205128205,
"loss": 1.0683,
"step": 680
},
{
"epoch": 28.6,
"learning_rate": 0.0007841880341880342,
"loss": 1.0321,
"step": 765
},
{
"epoch": 31.78,
"learning_rate": 0.0007478632478632479,
"loss": 1.0009,
"step": 850
},
{
"epoch": 34.95,
"learning_rate": 0.0007115384615384616,
"loss": 0.9663,
"step": 935
},
{
"epoch": 38.13,
"learning_rate": 0.0006752136752136753,
"loss": 0.938,
"step": 1020
},
{
"epoch": 41.31,
"learning_rate": 0.0006388888888888888,
"loss": 0.9178,
"step": 1105
},
{
"epoch": 44.49,
"learning_rate": 0.0006025641025641026,
"loss": 0.8984,
"step": 1190
},
{
"epoch": 47.66,
"learning_rate": 0.0005662393162393163,
"loss": 0.8827,
"step": 1275
},
{
"epoch": 50.84,
"learning_rate": 0.0005299145299145299,
"loss": 0.8693,
"step": 1360
},
{
"epoch": 54.02,
"learning_rate": 0.0004935897435897436,
"loss": 0.8514,
"step": 1445
},
{
"epoch": 57.2,
"learning_rate": 0.0004572649572649573,
"loss": 0.8349,
"step": 1530
},
{
"epoch": 60.37,
"learning_rate": 0.00042094017094017095,
"loss": 0.824,
"step": 1615
},
{
"epoch": 63.55,
"learning_rate": 0.00038461538461538467,
"loss": 0.8138,
"step": 1700
},
{
"epoch": 66.73,
"learning_rate": 0.0003482905982905983,
"loss": 0.8044,
"step": 1785
},
{
"epoch": 69.91,
"learning_rate": 0.00031196581196581195,
"loss": 0.7962,
"step": 1870
},
{
"epoch": 73.08,
"learning_rate": 0.0002756410256410257,
"loss": 0.7854,
"step": 1955
},
{
"epoch": 76.26,
"learning_rate": 0.00023931623931623932,
"loss": 0.7756,
"step": 2040
},
{
"epoch": 79.44,
"learning_rate": 0.000202991452991453,
"loss": 0.7663,
"step": 2125
},
{
"epoch": 82.62,
"learning_rate": 0.00016666666666666666,
"loss": 0.7569,
"step": 2210
},
{
"epoch": 85.79,
"learning_rate": 0.00013034188034188036,
"loss": 0.7491,
"step": 2295
},
{
"epoch": 88.97,
"learning_rate": 9.401709401709401e-05,
"loss": 0.7427,
"step": 2380
},
{
"epoch": 92.15,
"learning_rate": 5.76923076923077e-05,
"loss": 0.7344,
"step": 2465
}
],
"logging_steps": 85,
"max_steps": 2600,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 2.644335052617836e+19,
"trial_name": null,
"trial_params": null
}