|
{ |
|
"best_metric": 0.9743472495313, |
|
"best_model_checkpoint": "models/pos_final_mono_fr/checkpoint-560", |
|
"epoch": 39.94915254237288, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.021484027736620635, |
|
"eval_f1": 0.02014146604497732, |
|
"eval_loss": 3.6696622371673584, |
|
"eval_precision": 0.02098896013750248, |
|
"eval_recall": 0.019359756097560975, |
|
"eval_runtime": 1.991, |
|
"eval_samples_per_second": 833.259, |
|
"eval_steps_per_second": 3.516, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.051117169442304274, |
|
"eval_f1": 0.049818939975858664, |
|
"eval_loss": 3.6328794956207275, |
|
"eval_precision": 0.051272950211351684, |
|
"eval_recall": 0.048445121951219514, |
|
"eval_runtime": 1.9795, |
|
"eval_samples_per_second": 838.108, |
|
"eval_steps_per_second": 3.536, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_accuracy": 0.12674094707520892, |
|
"eval_f1": 0.11132299612354632, |
|
"eval_loss": 3.5738770961761475, |
|
"eval_precision": 0.11422247882986913, |
|
"eval_recall": 0.1085670731707317, |
|
"eval_runtime": 2.9806, |
|
"eval_samples_per_second": 556.603, |
|
"eval_steps_per_second": 2.349, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_accuracy": 0.30611035381971197, |
|
"eval_f1": 0.22206610578982539, |
|
"eval_loss": 3.4791259765625, |
|
"eval_precision": 0.2535112084816713, |
|
"eval_recall": 0.1975609756097561, |
|
"eval_runtime": 1.9318, |
|
"eval_samples_per_second": 858.799, |
|
"eval_steps_per_second": 3.624, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 0.3788300835654596, |
|
"eval_f1": 0.25393978707978787, |
|
"eval_loss": 3.3377487659454346, |
|
"eval_precision": 0.339298460283471, |
|
"eval_recall": 0.20289634146341465, |
|
"eval_runtime": 1.9526, |
|
"eval_samples_per_second": 849.649, |
|
"eval_steps_per_second": 3.585, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_accuracy": 0.34273691696793696, |
|
"eval_f1": 0.20375216215017516, |
|
"eval_loss": 3.188615560531616, |
|
"eval_precision": 0.3736782170164308, |
|
"eval_recall": 0.1400609756097561, |
|
"eval_runtime": 1.9288, |
|
"eval_samples_per_second": 860.111, |
|
"eval_steps_per_second": 3.629, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_accuracy": 0.4599656255556214, |
|
"eval_f1": 0.36921620863712845, |
|
"eval_loss": 3.0504870414733887, |
|
"eval_precision": 0.434243073878628, |
|
"eval_recall": 0.3211280487804878, |
|
"eval_runtime": 1.9835, |
|
"eval_samples_per_second": 836.412, |
|
"eval_steps_per_second": 3.529, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_accuracy": 0.5282403840455165, |
|
"eval_f1": 0.4701918608510921, |
|
"eval_loss": 2.8996212482452393, |
|
"eval_precision": 0.5159915488853272, |
|
"eval_recall": 0.431859756097561, |
|
"eval_runtime": 2.2513, |
|
"eval_samples_per_second": 736.924, |
|
"eval_steps_per_second": 3.109, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_accuracy": 0.573164226871333, |
|
"eval_f1": 0.5221832422289304, |
|
"eval_loss": 2.748504161834717, |
|
"eval_precision": 0.5617342460944357, |
|
"eval_recall": 0.48783536585365855, |
|
"eval_runtime": 1.9301, |
|
"eval_samples_per_second": 859.535, |
|
"eval_steps_per_second": 3.627, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.6246369940141053, |
|
"eval_f1": 0.5703931402685649, |
|
"eval_loss": 2.586193084716797, |
|
"eval_precision": 0.6076525336091003, |
|
"eval_recall": 0.5374390243902439, |
|
"eval_runtime": 2.1049, |
|
"eval_samples_per_second": 788.176, |
|
"eval_steps_per_second": 3.326, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_accuracy": 0.6886742132400877, |
|
"eval_f1": 0.6548560582094275, |
|
"eval_loss": 2.420483112335205, |
|
"eval_precision": 0.6804733727810651, |
|
"eval_recall": 0.6310975609756098, |
|
"eval_runtime": 2.1866, |
|
"eval_samples_per_second": 758.715, |
|
"eval_steps_per_second": 3.201, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"eval_accuracy": 0.7838854975404492, |
|
"eval_f1": 0.7690782646407386, |
|
"eval_loss": 2.260331869125366, |
|
"eval_precision": 0.7816327173125964, |
|
"eval_recall": 0.7569207317073171, |
|
"eval_runtime": 2.2785, |
|
"eval_samples_per_second": 728.116, |
|
"eval_steps_per_second": 3.072, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_accuracy": 0.83701772061874, |
|
"eval_f1": 0.8335373317013463, |
|
"eval_loss": 2.1123812198638916, |
|
"eval_precision": 0.8366093366093366, |
|
"eval_recall": 0.8304878048780487, |
|
"eval_runtime": 2.9322, |
|
"eval_samples_per_second": 565.792, |
|
"eval_steps_per_second": 2.387, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"eval_accuracy": 0.8735553843418479, |
|
"eval_f1": 0.8685824105426924, |
|
"eval_loss": 1.9825972318649292, |
|
"eval_precision": 0.8690861363775105, |
|
"eval_recall": 0.8680792682926829, |
|
"eval_runtime": 1.9874, |
|
"eval_samples_per_second": 834.748, |
|
"eval_steps_per_second": 3.522, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"eval_accuracy": 0.9239613583832158, |
|
"eval_f1": 0.9204770765335692, |
|
"eval_loss": 1.8721418380737305, |
|
"eval_precision": 0.9209546481108466, |
|
"eval_recall": 0.92, |
|
"eval_runtime": 1.9558, |
|
"eval_samples_per_second": 848.258, |
|
"eval_steps_per_second": 3.579, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"eval_accuracy": 0.9416819771232146, |
|
"eval_f1": 0.9391052511241521, |
|
"eval_loss": 1.7779291868209839, |
|
"eval_precision": 0.9390336838896509, |
|
"eval_recall": 0.9391768292682927, |
|
"eval_runtime": 1.9975, |
|
"eval_samples_per_second": 830.55, |
|
"eval_steps_per_second": 3.504, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_accuracy": 0.9466010786463581, |
|
"eval_f1": 0.944723771216138, |
|
"eval_loss": 1.698561429977417, |
|
"eval_precision": 0.9442346348297497, |
|
"eval_recall": 0.9452134146341463, |
|
"eval_runtime": 1.9516, |
|
"eval_samples_per_second": 850.075, |
|
"eval_steps_per_second": 3.587, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"eval_accuracy": 0.9486161322823445, |
|
"eval_f1": 0.9471599219892736, |
|
"eval_loss": 1.6294448375701904, |
|
"eval_precision": 0.9466983430799221, |
|
"eval_recall": 0.9476219512195122, |
|
"eval_runtime": 1.9621, |
|
"eval_samples_per_second": 845.505, |
|
"eval_steps_per_second": 3.568, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"eval_accuracy": 0.9498607242339833, |
|
"eval_f1": 0.9487042764210301, |
|
"eval_loss": 1.5666829347610474, |
|
"eval_precision": 0.948140929991778, |
|
"eval_recall": 0.9492682926829268, |
|
"eval_runtime": 2.1244, |
|
"eval_samples_per_second": 780.914, |
|
"eval_steps_per_second": 3.295, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"eval_accuracy": 0.9523499081372607, |
|
"eval_f1": 0.9514249592542383, |
|
"eval_loss": 1.5073306560516357, |
|
"eval_precision": 0.9506864326808925, |
|
"eval_recall": 0.9521646341463414, |
|
"eval_runtime": 1.9609, |
|
"eval_samples_per_second": 846.058, |
|
"eval_steps_per_second": 3.57, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"eval_accuracy": 0.9551650565993006, |
|
"eval_f1": 0.9544048870405071, |
|
"eval_loss": 1.4499109983444214, |
|
"eval_precision": 0.953780105349694, |
|
"eval_recall": 0.9550304878048781, |
|
"eval_runtime": 2.9847, |
|
"eval_samples_per_second": 555.827, |
|
"eval_steps_per_second": 2.345, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"eval_accuracy": 0.9562911159841166, |
|
"eval_f1": 0.9558884029925795, |
|
"eval_loss": 1.3926490545272827, |
|
"eval_precision": 0.9554662036613969, |
|
"eval_recall": 0.9563109756097561, |
|
"eval_runtime": 2.0934, |
|
"eval_samples_per_second": 792.487, |
|
"eval_steps_per_second": 3.344, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"eval_accuracy": 0.9612102175072601, |
|
"eval_f1": 0.961168007802975, |
|
"eval_loss": 1.3373351097106934, |
|
"eval_precision": 0.9609336908824964, |
|
"eval_recall": 0.9614024390243903, |
|
"eval_runtime": 2.5845, |
|
"eval_samples_per_second": 641.892, |
|
"eval_steps_per_second": 2.708, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"eval_accuracy": 0.9622770106086648, |
|
"eval_f1": 0.9623058515097475, |
|
"eval_loss": 1.2815097570419312, |
|
"eval_precision": 0.962203188343951, |
|
"eval_recall": 0.9624085365853658, |
|
"eval_runtime": 1.9491, |
|
"eval_samples_per_second": 851.178, |
|
"eval_steps_per_second": 3.591, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"eval_accuracy": 0.9646180288034137, |
|
"eval_f1": 0.9648307087214354, |
|
"eval_loss": 1.2245593070983887, |
|
"eval_precision": 0.9648748361130591, |
|
"eval_recall": 0.9647865853658537, |
|
"eval_runtime": 2.9881, |
|
"eval_samples_per_second": 555.195, |
|
"eval_steps_per_second": 2.343, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"eval_accuracy": 0.9647661945119421, |
|
"eval_f1": 0.9652270683110508, |
|
"eval_loss": 1.16820228099823, |
|
"eval_precision": 0.9652712138305333, |
|
"eval_recall": 0.9651829268292683, |
|
"eval_runtime": 2.9793, |
|
"eval_samples_per_second": 556.84, |
|
"eval_steps_per_second": 2.35, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"eval_accuracy": 0.9660996858886979, |
|
"eval_f1": 0.9654436860068261, |
|
"eval_loss": 1.1113808155059814, |
|
"eval_precision": 0.9649731968810916, |
|
"eval_recall": 0.9659146341463415, |
|
"eval_runtime": 2.0371, |
|
"eval_samples_per_second": 814.377, |
|
"eval_steps_per_second": 3.436, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"eval_accuracy": 0.9699223611687311, |
|
"eval_f1": 0.9672198601014949, |
|
"eval_loss": 1.0521485805511475, |
|
"eval_precision": 0.9669398823852037, |
|
"eval_recall": 0.9675, |
|
"eval_runtime": 2.1723, |
|
"eval_samples_per_second": 763.722, |
|
"eval_steps_per_second": 3.222, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"eval_accuracy": 0.9706928228530789, |
|
"eval_f1": 0.9678383074718775, |
|
"eval_loss": 0.9949794411659241, |
|
"eval_precision": 0.9677498018655124, |
|
"eval_recall": 0.9679268292682927, |
|
"eval_runtime": 2.9777, |
|
"eval_samples_per_second": 557.141, |
|
"eval_steps_per_second": 2.351, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 29.95, |
|
"eval_accuracy": 0.9716410833876608, |
|
"eval_f1": 0.9688157293095565, |
|
"eval_loss": 0.9363731741905212, |
|
"eval_precision": 0.968668088997257, |
|
"eval_recall": 0.9689634146341464, |
|
"eval_runtime": 2.0063, |
|
"eval_samples_per_second": 826.911, |
|
"eval_steps_per_second": 3.489, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 30.95, |
|
"eval_accuracy": 0.9720559473715403, |
|
"eval_f1": 0.9692110718205097, |
|
"eval_loss": 0.8799633383750916, |
|
"eval_precision": 0.9690929041697147, |
|
"eval_recall": 0.969329268292683, |
|
"eval_runtime": 1.9828, |
|
"eval_samples_per_second": 836.683, |
|
"eval_steps_per_second": 3.53, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 31.95, |
|
"eval_accuracy": 0.9726189770639483, |
|
"eval_f1": 0.9695962936434156, |
|
"eval_loss": 0.8233166337013245, |
|
"eval_precision": 0.9693451564737788, |
|
"eval_recall": 0.9698475609756098, |
|
"eval_runtime": 1.9361, |
|
"eval_samples_per_second": 856.891, |
|
"eval_steps_per_second": 3.616, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"eval_accuracy": 0.9733301724648847, |
|
"eval_f1": 0.9703057833602635, |
|
"eval_loss": 0.7679479122161865, |
|
"eval_precision": 0.9702762026705689, |
|
"eval_recall": 0.9703353658536585, |
|
"eval_runtime": 1.9498, |
|
"eval_samples_per_second": 850.85, |
|
"eval_steps_per_second": 3.59, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 33.95, |
|
"eval_accuracy": 0.9737450364487643, |
|
"eval_f1": 0.9711132452249204, |
|
"eval_loss": 0.7146441340446472, |
|
"eval_precision": 0.9710984421206671, |
|
"eval_recall": 0.9711280487804878, |
|
"eval_runtime": 1.9736, |
|
"eval_samples_per_second": 840.614, |
|
"eval_steps_per_second": 3.547, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 34.95, |
|
"eval_accuracy": 0.9749599952586974, |
|
"eval_f1": 0.9722899646384587, |
|
"eval_loss": 0.6641064286231995, |
|
"eval_precision": 0.9721714216044867, |
|
"eval_recall": 0.9724085365853659, |
|
"eval_runtime": 1.9368, |
|
"eval_samples_per_second": 856.569, |
|
"eval_steps_per_second": 3.614, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 35.68, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0937, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 35.95, |
|
"eval_accuracy": 0.9755230249511053, |
|
"eval_f1": 0.9729268292682927, |
|
"eval_loss": 0.6186906099319458, |
|
"eval_precision": 0.9729268292682927, |
|
"eval_recall": 0.9729268292682927, |
|
"eval_runtime": 1.993, |
|
"eval_samples_per_second": 832.399, |
|
"eval_steps_per_second": 3.512, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"eval_accuracy": 0.9756119243762224, |
|
"eval_f1": 0.9729490833168234, |
|
"eval_loss": 0.5833659172058105, |
|
"eval_precision": 0.9726970777341012, |
|
"eval_recall": 0.9732012195121951, |
|
"eval_runtime": 2.9665, |
|
"eval_samples_per_second": 559.252, |
|
"eval_steps_per_second": 2.36, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 37.95, |
|
"eval_accuracy": 0.9761749540686303, |
|
"eval_f1": 0.9736946383393786, |
|
"eval_loss": 0.5605461597442627, |
|
"eval_precision": 0.97348692631194, |
|
"eval_recall": 0.9739024390243902, |
|
"eval_runtime": 2.0134, |
|
"eval_samples_per_second": 823.972, |
|
"eval_steps_per_second": 3.477, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"eval_accuracy": 0.9764712854856872, |
|
"eval_f1": 0.9739244403127238, |
|
"eval_loss": 0.5465701222419739, |
|
"eval_precision": 0.9736721821007405, |
|
"eval_recall": 0.9741768292682926, |
|
"eval_runtime": 2.9641, |
|
"eval_samples_per_second": 559.689, |
|
"eval_steps_per_second": 2.362, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"eval_accuracy": 0.9768268831861554, |
|
"eval_f1": 0.9743472495313, |
|
"eval_loss": 0.5415592193603516, |
|
"eval_precision": 0.974243301734386, |
|
"eval_recall": 0.9744512195121952, |
|
"eval_runtime": 1.9918, |
|
"eval_samples_per_second": 832.894, |
|
"eval_steps_per_second": 3.514, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"step": 560, |
|
"total_flos": 3.723450094214784e+16, |
|
"train_loss": 1.9360494545527867, |
|
"train_runtime": 526.0611, |
|
"train_samples_per_second": 1135.077, |
|
"train_steps_per_second": 1.065 |
|
} |
|
], |
|
"max_steps": 560, |
|
"num_train_epochs": 40, |
|
"total_flos": 3.723450094214784e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|