|
{ |
|
"best_metric": 0.2544600938967136, |
|
"best_model_checkpoint": "/content/our_data/checkpoint-10500", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 12410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.91941982272361e-05, |
|
"loss": 2.0809, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5297831784608054, |
|
"eval_f1": 0.013054830287206266, |
|
"eval_loss": 2.0593647956848145, |
|
"eval_precision": 0.5, |
|
"eval_recall": 0.006613756613756613, |
|
"eval_runtime": 1.3043, |
|
"eval_samples_per_second": 233.075, |
|
"eval_steps_per_second": 116.537, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.83883964544722e-05, |
|
"loss": 1.8682, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5528949249463903, |
|
"eval_f1": 0.09097688292319164, |
|
"eval_loss": 1.8005567789077759, |
|
"eval_precision": 0.10427350427350428, |
|
"eval_recall": 0.08068783068783068, |
|
"eval_runtime": 1.4327, |
|
"eval_samples_per_second": 212.189, |
|
"eval_steps_per_second": 106.095, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.75825946817083e-05, |
|
"loss": 1.6332, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.574815344293543, |
|
"eval_f1": 0.14125, |
|
"eval_loss": 1.8355692625045776, |
|
"eval_precision": 0.1338862559241706, |
|
"eval_recall": 0.14947089947089948, |
|
"eval_runtime": 1.8642, |
|
"eval_samples_per_second": 163.074, |
|
"eval_steps_per_second": 81.537, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.67767929089444e-05, |
|
"loss": 1.468, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.5891112699547296, |
|
"eval_f1": 0.15114235500878734, |
|
"eval_loss": 1.6260936260223389, |
|
"eval_precision": 0.13564668769716087, |
|
"eval_recall": 0.17063492063492064, |
|
"eval_runtime": 1.9126, |
|
"eval_samples_per_second": 158.942, |
|
"eval_steps_per_second": 79.471, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.59709911361805e-05, |
|
"loss": 1.401, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.5986418870621872, |
|
"eval_f1": 0.16253968253968254, |
|
"eval_loss": 1.694327473640442, |
|
"eval_precision": 0.1562881562881563, |
|
"eval_recall": 0.1693121693121693, |
|
"eval_runtime": 1.3338, |
|
"eval_samples_per_second": 227.927, |
|
"eval_steps_per_second": 113.964, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.5165189363416601e-05, |
|
"loss": 1.1878, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.5975696926375983, |
|
"eval_f1": 0.16076058772687984, |
|
"eval_loss": 1.6739833354949951, |
|
"eval_precision": 0.11938382541720154, |
|
"eval_recall": 0.24603174603174602, |
|
"eval_runtime": 1.305, |
|
"eval_samples_per_second": 232.956, |
|
"eval_steps_per_second": 116.478, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.4359387590652701e-05, |
|
"loss": 1.1182, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.622706695258518, |
|
"eval_f1": 0.18434203220433093, |
|
"eval_loss": 1.6201189756393433, |
|
"eval_precision": 0.15885167464114833, |
|
"eval_recall": 0.21957671957671956, |
|
"eval_runtime": 1.2763, |
|
"eval_samples_per_second": 238.191, |
|
"eval_steps_per_second": 119.095, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.35535858178888e-05, |
|
"loss": 0.9677, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.6175839885632595, |
|
"eval_f1": 0.1704312114989733, |
|
"eval_loss": 1.6241066455841064, |
|
"eval_precision": 0.13926174496644295, |
|
"eval_recall": 0.21957671957671956, |
|
"eval_runtime": 1.3014, |
|
"eval_samples_per_second": 233.592, |
|
"eval_steps_per_second": 116.796, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.27477840451249e-05, |
|
"loss": 0.9055, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_accuracy": 0.6157969978556112, |
|
"eval_f1": 0.17582417582417587, |
|
"eval_loss": 1.5932097434997559, |
|
"eval_precision": 0.1316655694535879, |
|
"eval_recall": 0.26455026455026454, |
|
"eval_runtime": 1.5381, |
|
"eval_samples_per_second": 197.65, |
|
"eval_steps_per_second": 98.825, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.1941982272361e-05, |
|
"loss": 0.8772, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.6254467476769121, |
|
"eval_f1": 0.20804710500490678, |
|
"eval_loss": 1.5797325372695923, |
|
"eval_precision": 0.16536661466458658, |
|
"eval_recall": 0.2804232804232804, |
|
"eval_runtime": 1.8044, |
|
"eval_samples_per_second": 168.477, |
|
"eval_steps_per_second": 84.239, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.11361804995971e-05, |
|
"loss": 0.7224, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_accuracy": 0.6412913986180605, |
|
"eval_f1": 0.20699172033118673, |
|
"eval_loss": 1.5723158121109009, |
|
"eval_precision": 0.15867418899858957, |
|
"eval_recall": 0.2976190476190476, |
|
"eval_runtime": 1.8259, |
|
"eval_samples_per_second": 166.496, |
|
"eval_steps_per_second": 83.248, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.0330378726833199e-05, |
|
"loss": 0.7498, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_accuracy": 0.6496306885870861, |
|
"eval_f1": 0.22154779969650987, |
|
"eval_loss": 1.595717191696167, |
|
"eval_precision": 0.17936117936117937, |
|
"eval_recall": 0.2896825396825397, |
|
"eval_runtime": 1.7138, |
|
"eval_samples_per_second": 177.388, |
|
"eval_steps_per_second": 88.694, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 9.5245769540693e-06, |
|
"loss": 0.6632, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.6427209911841791, |
|
"eval_f1": 0.22222222222222224, |
|
"eval_loss": 1.6824833154678345, |
|
"eval_precision": 0.1863799283154122, |
|
"eval_recall": 0.2751322751322751, |
|
"eval_runtime": 1.3164, |
|
"eval_samples_per_second": 230.932, |
|
"eval_steps_per_second": 115.466, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 8.7187751813054e-06, |
|
"loss": 0.6139, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.6508220157255182, |
|
"eval_f1": 0.23450735621934907, |
|
"eval_loss": 1.5827279090881348, |
|
"eval_precision": 0.1768661735036987, |
|
"eval_recall": 0.3478835978835979, |
|
"eval_runtime": 1.3076, |
|
"eval_samples_per_second": 232.479, |
|
"eval_steps_per_second": 116.239, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 7.9129734085415e-06, |
|
"loss": 0.6212, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_accuracy": 0.6526090064331665, |
|
"eval_f1": 0.23380154055278662, |
|
"eval_loss": 1.5536507368087769, |
|
"eval_precision": 0.17780840799448655, |
|
"eval_recall": 0.3412698412698413, |
|
"eval_runtime": 1.9253, |
|
"eval_samples_per_second": 157.894, |
|
"eval_steps_per_second": 78.947, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 7.107171635777599e-06, |
|
"loss": 0.5379, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_accuracy": 0.6535620681439123, |
|
"eval_f1": 0.23245002324500233, |
|
"eval_loss": 1.5670047998428345, |
|
"eval_precision": 0.17921146953405018, |
|
"eval_recall": 0.3306878306878307, |
|
"eval_runtime": 1.8494, |
|
"eval_samples_per_second": 164.376, |
|
"eval_steps_per_second": 82.188, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 6.301369863013699e-06, |
|
"loss": 0.5376, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_accuracy": 0.6529664045746962, |
|
"eval_f1": 0.2388059701492537, |
|
"eval_loss": 1.6112617254257202, |
|
"eval_precision": 0.1844380403458213, |
|
"eval_recall": 0.3386243386243386, |
|
"eval_runtime": 1.9275, |
|
"eval_samples_per_second": 157.714, |
|
"eval_steps_per_second": 78.857, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5.495568090249799e-06, |
|
"loss": 0.5, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_accuracy": 0.6599952346914463, |
|
"eval_f1": 0.22989593188268687, |
|
"eval_loss": 1.6431697607040405, |
|
"eval_precision": 0.17893961708394698, |
|
"eval_recall": 0.32142857142857145, |
|
"eval_runtime": 1.9076, |
|
"eval_samples_per_second": 159.365, |
|
"eval_steps_per_second": 79.682, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.689766317485899e-06, |
|
"loss": 0.4928, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"eval_accuracy": 0.660948296402192, |
|
"eval_f1": 0.2414772727272727, |
|
"eval_loss": 1.6421875953674316, |
|
"eval_precision": 0.18805309734513273, |
|
"eval_recall": 0.3373015873015873, |
|
"eval_runtime": 1.2935, |
|
"eval_samples_per_second": 235.02, |
|
"eval_steps_per_second": 117.51, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 3.883964544721999e-06, |
|
"loss": 0.4877, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_accuracy": 0.6653562068143912, |
|
"eval_f1": 0.254, |
|
"eval_loss": 1.6850905418395996, |
|
"eval_precision": 0.20418006430868169, |
|
"eval_recall": 0.335978835978836, |
|
"eval_runtime": 1.4405, |
|
"eval_samples_per_second": 211.033, |
|
"eval_steps_per_second": 105.516, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 3.0781627719580986e-06, |
|
"loss": 0.4339, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.6636883488205861, |
|
"eval_f1": 0.2544600938967136, |
|
"eval_loss": 1.6376055479049683, |
|
"eval_precision": 0.19723435225618632, |
|
"eval_recall": 0.3584656084656085, |
|
"eval_runtime": 1.8779, |
|
"eval_samples_per_second": 161.879, |
|
"eval_steps_per_second": 80.94, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 2.2723609991941985e-06, |
|
"loss": 0.4303, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_accuracy": 0.660352632832976, |
|
"eval_f1": 0.2396694214876033, |
|
"eval_loss": 1.6363922357559204, |
|
"eval_precision": 0.18354430379746836, |
|
"eval_recall": 0.34523809523809523, |
|
"eval_runtime": 1.8659, |
|
"eval_samples_per_second": 162.922, |
|
"eval_steps_per_second": 81.461, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 1.4665592264302982e-06, |
|
"loss": 0.4509, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"eval_accuracy": 0.6664284012389802, |
|
"eval_f1": 0.2508507535245503, |
|
"eval_loss": 1.644798994064331, |
|
"eval_precision": 0.19830899308224442, |
|
"eval_recall": 0.3412698412698413, |
|
"eval_runtime": 1.3158, |
|
"eval_samples_per_second": 231.043, |
|
"eval_steps_per_second": 115.521, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 6.607574536663981e-07, |
|
"loss": 0.4114, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_accuracy": 0.6658327376697641, |
|
"eval_f1": 0.2510658455708195, |
|
"eval_loss": 1.6494354009628296, |
|
"eval_precision": 0.19557195571955718, |
|
"eval_recall": 0.3505291005291005, |
|
"eval_runtime": 1.3234, |
|
"eval_samples_per_second": 229.715, |
|
"eval_steps_per_second": 114.857, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 12410, |
|
"total_flos": 256497375844554.0, |
|
"train_loss": 0.8660164156044615, |
|
"train_runtime": 798.2036, |
|
"train_samples_per_second": 31.082, |
|
"train_steps_per_second": 15.547 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12410, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 256497375844554.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|