{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.47523786289338865, "precision": 0.535017852238396, "recall": 0.4274742154926487, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.5895036615134255, "precision": 0.5474121647147714, "recall": 0.6386073159982371, "support": 2269.0 }, "eval_O": { "f1-score": 0.8362336114421931, "precision": 0.845403060609712, "recall": 0.8272609362103526, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8706190412246543, "precision": 0.850072112232857, "recall": 0.8921838447777625, "support": 14534.0 }, "eval_accuracy": 0.7834858081163499, "eval_loss": 0.5869407653808594, "eval_macro avg": { "f1-score": 0.6928985442684154, "precision": 0.694476297448934, "recall": 0.6963815781197502, "support": 29841.0 }, "eval_runtime": 1.4026, "eval_samples_per_second": 57.038, "eval_steps_per_second": 7.13, "eval_weighted avg": { "f1-score": 0.7790930985214805, "precision": 0.7776202536983177, "recall": 0.7834858081163499, "support": 29841.0 }, "step": 41 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.5317480394636985, "precision": 0.6276500447894894, "recall": 0.4612683783190696, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.7102803738317758, "precision": 0.6855268552685527, "recall": 0.7368884971353019, "support": 2269.0 }, "eval_O": { "f1-score": 0.8735280263777673, "precision": 0.872397977184523, "recall": 0.8746610069567268, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.884589815184151, "precision": 0.8556913183279743, "recall": 0.9155084629145452, "support": 14534.0 }, "eval_accuracy": 0.820951040514728, "eval_loss": 0.48614954948425293, "eval_macro avg": { "f1-score": 0.7500365637143481, "precision": 0.7603165488926349, "recall": 0.7470815863314109, "support": 29841.0 }, "eval_runtime": 1.3997, "eval_samples_per_second": 57.154, "eval_steps_per_second": 7.144, "eval_weighted avg": { "f1-score": 0.8143098940939201, "precision": 0.8126767385071133, "recall": 0.820951040514728, "support": 29841.0 }, "step": 82 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.5762534088525278, "precision": 0.5519389190275267, "recall": 0.6028088654816766, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.7533490937746257, "precision": 0.6811542572141076, "recall": 0.8426619656236227, "support": 2269.0 }, "eval_O": { "f1-score": 0.8863363002165023, "precision": 0.9045047256658892, "recall": 0.868883386393114, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8715370552664885, "precision": 0.8910855499640546, "recall": 0.8528278519333975, "support": 14534.0 }, "eval_accuracy": 0.8184377199155525, "eval_loss": 0.46511921286582947, "eval_macro avg": { "f1-score": 0.7718689645275361, "precision": 0.7571708629678946, "recall": 0.7917955173579527, "support": 29841.0 }, "eval_runtime": 1.4011, "eval_samples_per_second": 57.099, "eval_steps_per_second": 7.137, "eval_weighted avg": { "f1-score": 0.8216639389194362, "precision": 0.8271460951435015, "recall": 0.8184377199155525, "support": 29841.0 }, "step": 123 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.5960000000000001, "precision": 0.5727291118753793, "recall": 0.6212420452051789, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.7671584348941629, "precision": 0.7450166112956811, "recall": 0.7906566769501984, "support": 2269.0 }, "eval_O": { "f1-score": 0.8945798982634625, "precision": 0.8872651356993737, "recall": 0.9020162716660771, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8770509119076122, "precision": 0.8981107585809057, "recall": 0.8569561029310582, "support": 14534.0 }, "eval_accuracy": 0.8287255789015113, "eval_loss": 0.46847572922706604, "eval_macro avg": { "f1-score": 0.7836973112663095, "precision": 0.7757804043628349, "recall": 0.792717774188128, "support": 29841.0 }, "eval_runtime": 1.3906, "eval_samples_per_second": 57.529, "eval_steps_per_second": 7.191, "eval_weighted avg": { "f1-score": 0.8307578351802057, "precision": 0.8336988249364055, "recall": 0.8287255789015113, "support": 29841.0 }, "step": 164 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.5957255343082115, "precision": 0.6111239326102008, "recall": 0.5810840465218345, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.7869718309859155, "precision": 0.7859340659340659, "recall": 0.7880123402379903, "support": 2269.0 }, "eval_O": { "f1-score": 0.8924617196702003, "precision": 0.8915166490175315, "recall": 0.8934087961325315, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8858138581385815, "precision": 0.8798018189222208, "recall": 0.8919086280445852, "support": 14534.0 }, "eval_accuracy": 0.8369692704668074, "eval_loss": 0.4714011251926422, "eval_macro avg": { "f1-score": 0.7902432357757272, "precision": 0.7920941166210047, "recall": 0.7886034527342354, "support": 29841.0 }, "eval_runtime": 1.4, "eval_samples_per_second": 57.143, "eval_steps_per_second": 7.143, "eval_weighted avg": { "f1-score": 0.8358884354766487, "precision": 0.8349642603479214, "recall": 0.8369692704668074, "support": 29841.0 }, "step": 205 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.5968608901311546, "precision": 0.5850368809272919, "recall": 0.6091727013385999, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8127323420074349, "precision": 0.8594594594594595, "recall": 0.7708241516086382, "support": 2269.0 }, "eval_O": { "f1-score": 0.8972979364985514, "precision": 0.8999051233396584, "recall": 0.8947058129937507, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8825566642663649, "precision": 0.8796910246770114, "recall": 0.8854410348149168, "support": 14534.0 }, "eval_accuracy": 0.8371703361147415, "eval_loss": 0.5037193298339844, "eval_macro avg": { "f1-score": 0.7973619582258764, "precision": 0.8060231221008552, "recall": 0.7900359251889764, "support": 29841.0 }, "eval_runtime": 1.4008, "eval_samples_per_second": 57.109, "eval_steps_per_second": 7.139, "eval_weighted avg": { "f1-score": 0.8378086229762441, "precision": 0.8389012192486348, "recall": 0.8371703361147415, "support": 29841.0 }, "step": 246 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6057510824913955, "precision": 0.6130337078651685, "recall": 0.5986394557823129, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.7914081145584726, "precision": 0.8630921395106715, "recall": 0.7307183781401498, "support": 2269.0 }, "eval_O": { "f1-score": 0.8969556393157437, "precision": 0.8824737562756733, "recall": 0.9119207640608419, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8857729138166894, "precision": 0.880592955256358, "recall": 0.8910141736617586, "support": 14534.0 }, "eval_accuracy": 0.8401192989511075, "eval_loss": 0.5329757928848267, "eval_macro avg": { "f1-score": 0.7949719375455753, "precision": 0.8097981397269679, "recall": 0.7830731929112658, "support": 29841.0 }, "eval_runtime": 1.3907, "eval_samples_per_second": 57.526, "eval_steps_per_second": 7.191, "eval_weighted avg": { "f1-score": 0.8390140076168712, "precision": 0.8389379916879856, "recall": 0.8401192989511075, "support": 29841.0 }, "step": 287 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.6007012930089853, "precision": 0.599912453490917, "recall": 0.6014922097871407, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8150046598322461, "precision": 0.8645575877409788, "recall": 0.7708241516086382, "support": 2269.0 }, "eval_O": { "f1-score": 0.8958295721249322, "precision": 0.9148230088495575, "recall": 0.8776087725504068, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8864303302189092, "precision": 0.8694501422616291, "recall": 0.9040869684876841, "support": 14534.0 }, "eval_accuracy": 0.8402198317750745, "eval_loss": 0.5759353637695312, "eval_macro avg": { "f1-score": 0.7994914637962682, "precision": 0.8121857980857706, "recall": 0.7885030256084674, "support": 29841.0 }, "eval_runtime": 1.4002, "eval_samples_per_second": 57.137, "eval_steps_per_second": 7.142, "eval_weighted avg": { "f1-score": 0.8400372100799064, "precision": 0.8408124567818104, "recall": 0.8402198317750745, "support": 29841.0 }, "step": 328 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.6078538018057218, "precision": 0.6026747195858498, "recall": 0.6131226684222076, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8104317335086566, "precision": 0.8060156931124673, "recall": 0.8148964301454386, "support": 2269.0 }, "eval_O": { "f1-score": 0.8967088304058509, "precision": 0.9120731707317074, "recall": 0.8818535550053059, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8858266370319713, "precision": 0.8804975868397797, "recall": 0.8912205862116417, "support": 14534.0 }, "eval_accuracy": 0.8402868536577193, "eval_loss": 0.597597062587738, "eval_macro avg": { "f1-score": 0.8002052506880502, "precision": 0.800315292567451, "recall": 0.8002733099461484, "support": 29841.0 }, "eval_runtime": 1.4006, "eval_samples_per_second": 57.118, "eval_steps_per_second": 7.14, "eval_weighted avg": { "f1-score": 0.8407376197665799, "precision": 0.8413820848138425, "recall": 0.8402868536577193, "support": 29841.0 }, "step": 369 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.6166648417825469, "precision": 0.6153846153846154, "recall": 0.6179504059688391, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8066597294484912, "precision": 0.7641955835962145, "recall": 0.8541207580431909, "support": 2269.0 }, "eval_O": { "f1-score": 0.8995083343326538, "precision": 0.9150908869098451, "recall": 0.8844475887277443, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8894240693593889, "precision": 0.8894852738783374, "recall": 0.8893628732626944, "support": 14534.0 }, "eval_accuracy": 0.8438390134378875, "eval_loss": 0.6327010989189148, "eval_macro avg": { "f1-score": 0.8030642437307701, "precision": 0.7960390899422531, "recall": 0.8114704065006171, "support": 29841.0 }, "eval_runtime": 1.3944, "eval_samples_per_second": 57.373, "eval_steps_per_second": 7.172, "eval_weighted avg": { "f1-score": 0.8443440976397001, "precision": 0.8453782465037248, "recall": 0.8438390134378875, "support": 29841.0 }, "step": 410 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.6205015213513796, "precision": 0.5944913550462404, "recall": 0.6488918147904323, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8115818607621886, "precision": 0.78500823723229, "recall": 0.8400176289114147, "support": 2269.0 }, "eval_O": { "f1-score": 0.8931679980922858, "precision": 0.9032919329555047, "recall": 0.8832684824902723, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8841004184100418, "precision": 0.8962250812950657, "recall": 0.872299435805697, "support": 14534.0 }, "eval_accuracy": 0.8388458831808585, "eval_loss": 0.6347343325614929, "eval_macro avg": { "f1-score": 0.802337949653974, "precision": 0.7947541516322751, "recall": 0.8111193404994541, "support": 29841.0 }, "eval_runtime": 1.3894, "eval_samples_per_second": 57.578, "eval_steps_per_second": 7.197, "eval_weighted avg": { "f1-score": 0.8409094181783406, "precision": 0.843699440707882, "recall": 0.8388458831808585, "support": 29841.0 }, "step": 451 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.6289669861554845, "precision": 0.6110076557003932, "recall": 0.6480140443274084, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8147826086956521, "precision": 0.803946803946804, "recall": 0.8259144997796386, "support": 2269.0 }, "eval_O": { "f1-score": 0.9003071107961257, "precision": 0.901905099988167, "recall": 0.8987147742011555, "support": 8481.0 }, "eval_Premise": { "f1-score": 0.8883866481223922, "precision": 0.8980036552790664, "recall": 0.8789734415852484, "support": 14534.0 }, "eval_accuracy": 0.8452799839147481, "eval_loss": 0.6512799859046936, "eval_macro avg": { "f1-score": 0.8081108384424137, "precision": 0.8037158037286076, "recall": 0.8129041899733627, "support": 29841.0 }, "eval_runtime": 1.3915, "eval_samples_per_second": 57.493, "eval_steps_per_second": 7.187, "eval_weighted avg": { "f1-score": 0.8465621274593268, "precision": 0.8481337577161484, "recall": 0.8452799839147481, "support": 29841.0 }, "step": 492 } ], "logging_steps": 500, "max_steps": 656, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 1725464792721600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }