|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.96370235934664, |
|
"global_step": 22000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.9546279491833034e-05, |
|
"loss": 0.834, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9092558983666063e-05, |
|
"loss": 0.8279, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.8352304100990295, |
|
"eval_runtime": 300.0386, |
|
"eval_samples_per_second": 338.6, |
|
"eval_steps_per_second": 5.293, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.8638838475499095e-05, |
|
"loss": 0.8249, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.8185117967332124e-05, |
|
"loss": 0.8218, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.8333624005317688, |
|
"eval_runtime": 299.0242, |
|
"eval_samples_per_second": 339.748, |
|
"eval_steps_per_second": 5.311, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.7731397459165156e-05, |
|
"loss": 0.8171, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.7277676950998185e-05, |
|
"loss": 0.8138, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.8279737234115601, |
|
"eval_runtime": 299.8977, |
|
"eval_samples_per_second": 338.759, |
|
"eval_steps_per_second": 5.295, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.6823956442831217e-05, |
|
"loss": 0.8094, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.637023593466425e-05, |
|
"loss": 0.8084, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.8279556035995483, |
|
"eval_runtime": 305.7805, |
|
"eval_samples_per_second": 332.242, |
|
"eval_steps_per_second": 5.193, |
|
"step": 4408 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.5916515426497278e-05, |
|
"loss": 0.8028, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.546279491833031e-05, |
|
"loss": 0.7997, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.500907441016334e-05, |
|
"loss": 0.7975, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.8251700401306152, |
|
"eval_runtime": 299.8544, |
|
"eval_samples_per_second": 338.808, |
|
"eval_steps_per_second": 5.296, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 1.4555353901996372e-05, |
|
"loss": 0.798, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 1.4101633393829401e-05, |
|
"loss": 0.7965, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.8234975337982178, |
|
"eval_runtime": 299.5265, |
|
"eval_samples_per_second": 339.179, |
|
"eval_steps_per_second": 5.302, |
|
"step": 6612 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.3647912885662433e-05, |
|
"loss": 0.7913, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.3194192377495462e-05, |
|
"loss": 0.7858, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.8199203014373779, |
|
"eval_runtime": 298.6313, |
|
"eval_samples_per_second": 340.195, |
|
"eval_steps_per_second": 5.318, |
|
"step": 7714 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 1.2740471869328494e-05, |
|
"loss": 0.7845, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1.2286751361161527e-05, |
|
"loss": 0.7808, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.8201999664306641, |
|
"eval_runtime": 305.3068, |
|
"eval_samples_per_second": 332.757, |
|
"eval_steps_per_second": 5.201, |
|
"step": 8816 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 1.1833030852994555e-05, |
|
"loss": 0.7824, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 1.1379310344827587e-05, |
|
"loss": 0.7805, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.8179581165313721, |
|
"eval_runtime": 299.8221, |
|
"eval_samples_per_second": 338.844, |
|
"eval_steps_per_second": 5.296, |
|
"step": 9918 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 1.0925589836660618e-05, |
|
"loss": 0.7762, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 1.0471869328493648e-05, |
|
"loss": 0.7744, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 1.0018148820326679e-05, |
|
"loss": 0.7698, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.813572347164154, |
|
"eval_runtime": 299.3306, |
|
"eval_samples_per_second": 339.401, |
|
"eval_steps_per_second": 5.305, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 9.564428312159711e-06, |
|
"loss": 0.768, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 9.110707803992742e-06, |
|
"loss": 0.7704, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.8144657015800476, |
|
"eval_runtime": 299.3629, |
|
"eval_samples_per_second": 339.364, |
|
"eval_steps_per_second": 5.305, |
|
"step": 12122 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 8.656987295825772e-06, |
|
"loss": 0.7649, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 8.203266787658803e-06, |
|
"loss": 0.7666, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.8124042749404907, |
|
"eval_runtime": 306.0664, |
|
"eval_samples_per_second": 331.931, |
|
"eval_steps_per_second": 5.188, |
|
"step": 13224 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 7.749546279491835e-06, |
|
"loss": 0.7638, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 7.295825771324865e-06, |
|
"loss": 0.7616, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.8092049360275269, |
|
"eval_runtime": 300.1177, |
|
"eval_samples_per_second": 338.511, |
|
"eval_steps_per_second": 5.291, |
|
"step": 14326 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 6.842105263157896e-06, |
|
"loss": 0.7597, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 6.388384754990926e-06, |
|
"loss": 0.7558, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.8094133138656616, |
|
"eval_runtime": 299.6821, |
|
"eval_samples_per_second": 339.003, |
|
"eval_steps_per_second": 5.299, |
|
"step": 15428 |
|
}, |
|
{ |
|
"epoch": 14.07, |
|
"learning_rate": 5.934664246823957e-06, |
|
"loss": 0.7575, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 5.480943738656987e-06, |
|
"loss": 0.7562, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 5.027223230490018e-06, |
|
"loss": 0.7564, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.8088417053222656, |
|
"eval_runtime": 300.0855, |
|
"eval_samples_per_second": 338.547, |
|
"eval_steps_per_second": 5.292, |
|
"step": 16530 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 4.57350272232305e-06, |
|
"loss": 0.7526, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 4.11978221415608e-06, |
|
"loss": 0.753, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.8049781918525696, |
|
"eval_runtime": 304.2347, |
|
"eval_samples_per_second": 333.93, |
|
"eval_steps_per_second": 5.22, |
|
"step": 17632 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 3.666061705989111e-06, |
|
"loss": 0.7524, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 3.212341197822142e-06, |
|
"loss": 0.7502, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.8041799068450928, |
|
"eval_runtime": 301.0417, |
|
"eval_samples_per_second": 337.472, |
|
"eval_steps_per_second": 5.275, |
|
"step": 18734 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 2.7586206896551725e-06, |
|
"loss": 0.7487, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 2.3049001814882034e-06, |
|
"loss": 0.7495, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.8050508499145508, |
|
"eval_runtime": 298.8367, |
|
"eval_samples_per_second": 339.962, |
|
"eval_steps_per_second": 5.314, |
|
"step": 19836 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 1.8511796733212343e-06, |
|
"loss": 0.7478, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 1.3974591651542652e-06, |
|
"loss": 0.7479, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.8038392066955566, |
|
"eval_runtime": 299.5234, |
|
"eval_samples_per_second": 339.182, |
|
"eval_steps_per_second": 5.302, |
|
"step": 20938 |
|
}, |
|
{ |
|
"epoch": 19.06, |
|
"learning_rate": 9.43738656987296e-07, |
|
"loss": 0.7469, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 4.900181488203267e-07, |
|
"loss": 0.7473, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 19.96, |
|
"learning_rate": 3.629764065335753e-08, |
|
"loss": 0.7475, |
|
"step": 22000 |
|
} |
|
], |
|
"max_steps": 22040, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.622716924469212e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|