|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 97.19626168224299, |
|
"eval_steps": 500, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.0003269230769230769, |
|
"loss": 1.6646, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.0006538461538461538, |
|
"loss": 1.517, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 0.0009807692307692308, |
|
"loss": 1.4586, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.0009658119658119658, |
|
"loss": 1.3597, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 0.0009294871794871796, |
|
"loss": 1.2625, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 0.0008931623931623932, |
|
"loss": 1.1835, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 22.24, |
|
"learning_rate": 0.0008568376068376068, |
|
"loss": 1.1184, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 0.0008205128205128205, |
|
"loss": 1.0683, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"learning_rate": 0.0007841880341880342, |
|
"loss": 1.0321, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"learning_rate": 0.0007478632478632479, |
|
"loss": 1.0009, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 34.95, |
|
"learning_rate": 0.0007115384615384616, |
|
"loss": 0.9663, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 38.13, |
|
"learning_rate": 0.0006752136752136753, |
|
"loss": 0.938, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 41.31, |
|
"learning_rate": 0.0006388888888888888, |
|
"loss": 0.9178, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 44.49, |
|
"learning_rate": 0.0006025641025641026, |
|
"loss": 0.8984, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 47.66, |
|
"learning_rate": 0.0005662393162393163, |
|
"loss": 0.8827, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 50.84, |
|
"learning_rate": 0.0005299145299145299, |
|
"loss": 0.8693, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 54.02, |
|
"learning_rate": 0.0004935897435897436, |
|
"loss": 0.8514, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"learning_rate": 0.0004572649572649573, |
|
"loss": 0.8349, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 60.37, |
|
"learning_rate": 0.00042094017094017095, |
|
"loss": 0.824, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 63.55, |
|
"learning_rate": 0.00038461538461538467, |
|
"loss": 0.8138, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 66.73, |
|
"learning_rate": 0.0003482905982905983, |
|
"loss": 0.8044, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 69.91, |
|
"learning_rate": 0.00031196581196581195, |
|
"loss": 0.7962, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 73.08, |
|
"learning_rate": 0.0002756410256410257, |
|
"loss": 0.7854, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 76.26, |
|
"learning_rate": 0.00023931623931623932, |
|
"loss": 0.7756, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 79.44, |
|
"learning_rate": 0.000202991452991453, |
|
"loss": 0.7663, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 82.62, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.7569, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 85.79, |
|
"learning_rate": 0.00013034188034188036, |
|
"loss": 0.7491, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 88.97, |
|
"learning_rate": 9.401709401709401e-05, |
|
"loss": 0.7427, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 92.15, |
|
"learning_rate": 5.76923076923077e-05, |
|
"loss": 0.7344, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 95.33, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.7307, |
|
"step": 2550 |
|
} |
|
], |
|
"logging_steps": 85, |
|
"max_steps": 2600, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 2.7054682298346635e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|