{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.3472626289341444, "precision": 0.3978787878787879, "recall": 0.3080713280150164, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.5334239746204397, "precision": 0.5235765124555161, "recall": 0.54364896073903, "support": 2165.0 }, "eval_O": { "f1-score": 0.8390533194223273, "precision": 0.9157377442167086, "recall": 0.7742197000405351, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8562600940945159, "precision": 0.7896134170821731, "recall": 0.9351944167497508, "support": 13039.0 }, "eval_accuracy": 0.7610281584509443, "eval_loss": 0.6209574341773987, "eval_macro avg": { "f1-score": 0.6440000042678569, "precision": 0.6567016154082965, "recall": 0.6402836013860831, "support": 29334.0 }, "eval_runtime": 1.3904, "eval_samples_per_second": 57.539, "eval_steps_per_second": 7.192, "eval_weighted avg": { "f1-score": 0.7526914076678427, "precision": 0.7554909643645776, "recall": 0.7610281584509443, "support": 29334.0 }, "step": 41 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.4196301564722618, "precision": 0.5328757225433526, "recall": 0.34608165180666356, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.6500777604976672, "precision": 0.6262842465753424, "recall": 0.6757505773672056, "support": 2165.0 }, "eval_O": { "f1-score": 0.8802794869120867, "precision": 0.9066595059076262, "recall": 0.8553911633563032, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8765692621338388, "precision": 0.821313672922252, "recall": 0.9397959966255081, "support": 13039.0 }, "eval_accuracy": 0.8056521442694484, "eval_loss": 0.5057439804077148, "eval_macro avg": { "f1-score": 0.7066391665039636, "precision": 0.7217832869871433, "recall": 0.7042548472889201, "support": 29334.0 }, "eval_runtime": 1.3936, "eval_samples_per_second": 57.406, "eval_steps_per_second": 7.176, "eval_weighted avg": { "f1-score": 0.7947114837449316, "precision": 0.7937221895699558, "recall": 0.8056521442694484, "support": 29334.0 }, "step": 82 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.5597184377838329, "precision": 0.542234931808183, "recall": 0.5783669638667293, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.7123406094661768, "precision": 0.669374492282697, "recall": 0.7612009237875289, "support": 2165.0 }, "eval_O": { "f1-score": 0.896587330270019, "precision": 0.9139037996000421, "recall": 0.8799148763680583, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.879910300030931, "precision": 0.8872514619883041, "recall": 0.8726896234373802, "support": 13039.0 }, "eval_accuracy": 0.8241289970682485, "eval_loss": 0.47074389457702637, "eval_macro avg": { "f1-score": 0.76213916938774, "precision": 0.7531911714198065, "recall": 0.7730430968649242, "support": 29334.0 }, "eval_runtime": 1.3982, "eval_samples_per_second": 57.218, "eval_steps_per_second": 7.152, "eval_weighted avg": { "f1-score": 0.8266316076408545, "precision": 0.8300087121591747, "recall": 0.8241289970682485, "support": 29334.0 }, "step": 123 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.5578872907333177, "precision": 0.5606635071090047, "recall": 0.5551384326607227, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.7178253548231899, "precision": 0.748995983935743, "recall": 0.6891454965357968, "support": 2165.0 }, "eval_O": { "f1-score": 0.8866524874202418, "precision": 0.9082793070464449, "recall": 0.8660316173490069, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8822517942583731, "precision": 0.8605702617953767, "recall": 0.9050540685635402, "support": 13039.0 }, "eval_accuracy": 0.8251517010977023, "eval_loss": 0.49949103593826294, "eval_macro avg": { "f1-score": 0.7611542318087806, "precision": 0.7696272649716422, "recall": 0.7538424037772666, "support": 29334.0 }, "eval_runtime": 1.3933, "eval_samples_per_second": 57.419, "eval_steps_per_second": 7.177, "eval_weighted avg": { "f1-score": 0.8244690603905188, "precision": 0.8248108003682995, "recall": 0.8251517010977023, "support": 29334.0 }, "step": 164 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.5622098421541318, "precision": 0.5562700964630225, "recall": 0.5682778038479587, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.7078507078507079, "precision": 0.7994186046511628, "recall": 0.6351039260969977, "support": 2165.0 }, "eval_O": { "f1-score": 0.8867608581894296, "precision": 0.9167929019692708, "recall": 0.858633968382651, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8842074139778985, "precision": 0.8533314310172635, "recall": 0.9174016412301557, "support": 13039.0 }, "eval_accuracy": 0.8260721347242108, "eval_loss": 0.5355645418167114, "eval_macro avg": { "f1-score": 0.760257205543042, "precision": 0.7814532585251799, "recall": 0.7448543348894408, "support": 29334.0 }, "eval_runtime": 1.3973, "eval_samples_per_second": 57.254, "eval_steps_per_second": 7.157, "eval_weighted avg": { "f1-score": 0.8252666444817892, "precision": 0.827540237126271, "recall": 0.8260721347242108, "support": 29334.0 }, "step": 205 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.5778948628906718, "precision": 0.5901198337001712, "recall": 0.5661661191928672, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.7630429786256032, "precision": 0.7593778591033852, "recall": 0.766743648960739, "support": 2165.0 }, "eval_O": { "f1-score": 0.8932349450436038, "precision": 0.909998948585848, "recall": 0.877077421970004, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8871252867942979, "precision": 0.8704605845881311, "recall": 0.9044405245801058, "support": 13039.0 }, "eval_accuracy": 0.8359241835412832, "eval_loss": 0.5402312278747559, "eval_macro avg": { "f1-score": 0.7803245183385442, "precision": 0.7824893064943839, "recall": 0.778606928675929, "support": 29334.0 }, "eval_runtime": 1.3946, "eval_samples_per_second": 57.364, "eval_steps_per_second": 7.17, "eval_weighted avg": { "f1-score": 0.8350939185438606, "precision": 0.8348315600763192, "recall": 0.8359241835412832, "support": 29334.0 }, "step": 246 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.5928237129485181, "precision": 0.5645161290322581, "recall": 0.6241201313937119, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.7545109211775878, "precision": 0.776257938446507, "recall": 0.7339491916859122, "support": 2165.0 }, "eval_O": { "f1-score": 0.8982721603108067, "precision": 0.9063338147307612, "recall": 0.8903526550466153, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8843364197530864, "precision": 0.8897601117925626, "recall": 0.8789784492675818, "support": 13039.0 }, "eval_accuracy": 0.835071930183405, "eval_loss": 0.5522010922431946, "eval_macro avg": { "f1-score": 0.7824858035474997, "precision": 0.7842169985005223, "recall": 0.7818501068484554, "support": 29334.0 }, "eval_runtime": 1.3973, "eval_samples_per_second": 57.253, "eval_steps_per_second": 7.157, "eval_weighted avg": { "f1-score": 0.8370881251804593, "precision": 0.8397030872059231, "recall": 0.835071930183405, "support": 29334.0 }, "step": 287 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.5708034520481342, "precision": 0.5921815889029004, "recall": 0.5509150633505396, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.7608799617407939, "precision": 0.7887952404561229, "recall": 0.7348729792147806, "support": 2165.0 }, "eval_O": { "f1-score": 0.9017624521072796, "precision": 0.909240754094983, "recall": 0.8944061613295501, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.888622395442962, "precision": 0.868889703187981, "recall": 0.909272183449651, "support": 13039.0 }, "eval_accuracy": 0.8393331969727961, "eval_loss": 0.5863537788391113, "eval_macro avg": { "f1-score": 0.7805170653347924, "precision": 0.7897768216604968, "recall": 0.7723665968361304, "support": 29334.0 }, "eval_runtime": 1.4013, "eval_samples_per_second": 57.089, "eval_steps_per_second": 7.136, "eval_weighted avg": { "f1-score": 0.8374380828176649, "precision": 0.8363489544136171, "recall": 0.8393331969727961, "support": 29334.0 }, "step": 328 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.5834502103786816, "precision": 0.5400439384861194, "recall": 0.6344439230408259, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.739652870493992, "precision": 0.7136109918419923, "recall": 0.7676674364896073, "support": 2165.0 }, "eval_O": { "f1-score": 0.8923944839114083, "precision": 0.9208710651142734, "recall": 0.8656262667207134, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8790807810255813, "precision": 0.8900330136770948, "recall": 0.86839481555334, "support": 13039.0 }, "eval_accuracy": 0.8260380445898957, "eval_loss": 0.6257872581481934, "eval_macro avg": { "f1-score": 0.7736445864524157, "precision": 0.76613975227987, "recall": 0.7840331104511216, "support": 29334.0 }, "eval_runtime": 1.3929, "eval_samples_per_second": 57.435, "eval_steps_per_second": 7.179, "eval_weighted avg": { "f1-score": 0.8303162314135053, "precision": 0.8365354605252965, "recall": 0.8260380445898957, "support": 29334.0 }, "step": 369 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.5916413728694839, "precision": 0.5887546468401487, "recall": 0.5945565462224308, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.747756258856873, "precision": 0.765103914934751, "recall": 0.7311778290993072, "support": 2165.0 }, "eval_O": { "f1-score": 0.8968070337806571, "precision": 0.9102390147166266, "recall": 0.8837657073368463, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8894356334456263, "precision": 0.878101644245142, "recall": 0.9010660326712171, "support": 13039.0 }, "eval_accuracy": 0.8381741324060816, "eval_loss": 0.643328845500946, "eval_macro avg": { "f1-score": 0.78141007473816, "precision": 0.7855498051841671, "recall": 0.7776415288324503, "support": 29334.0 }, "eval_runtime": 1.3951, "eval_samples_per_second": 57.345, "eval_steps_per_second": 7.168, "eval_weighted avg": { "f1-score": 0.8381915478775454, "precision": 0.8385330407446147, "recall": 0.8381741324060816, "support": 29334.0 }, "step": 410 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.5791188895594448, "precision": 0.5963211533681332, "recall": 0.5628812763960582, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.7536862460720328, "precision": 0.7905679513184585, "recall": 0.7200923787528868, "support": 2165.0 }, "eval_O": { "f1-score": 0.896530612244898, "precision": 0.9027949034114262, "recall": 0.8903526550466153, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.888538617428507, "precision": 0.8699933857573308, "recall": 0.9078917094869239, "support": 13039.0 }, "eval_accuracy": 0.838003681734506, "eval_loss": 0.691639244556427, "eval_macro avg": { "f1-score": 0.7794685913262207, "precision": 0.7899193484638372, "recall": 0.770304504920621, "support": 29334.0 }, "eval_runtime": 1.3951, "eval_samples_per_second": 57.345, "eval_steps_per_second": 7.168, "eval_weighted avg": { "f1-score": 0.836318079509486, "precision": 0.8354034306270279, "recall": 0.838003681734506, "support": 29334.0 }, "step": 451 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.5808454740864581, "precision": 0.5914396887159533, "recall": 0.5706241201313937, "support": 4262.0 }, "eval_MajorClaim": { "f1-score": 0.7569141193595342, "precision": 0.797138477261114, "recall": 0.7205542725173211, "support": 2165.0 }, "eval_O": { "f1-score": 0.8973055414336554, "precision": 0.9003264639869415, "recall": 0.8943048236724767, "support": 9868.0 }, "eval_Premise": { "f1-score": 0.8841596860614294, "precision": 0.870236945703038, "recall": 0.8985351637395506, "support": 13039.0 }, "eval_accuracy": 0.8363332651530647, "eval_loss": 0.6996743679046631, "eval_macro avg": { "f1-score": 0.7798062052352693, "precision": 0.7897853939167616, "recall": 0.7710045950151856, "support": 29334.0 }, "eval_runtime": 1.3919, "eval_samples_per_second": 57.477, "eval_steps_per_second": 7.185, "eval_weighted avg": { "f1-score": 0.8351214191174802, "precision": 0.8344570068256206, "recall": 0.8363332651530647, "support": 29334.0 }, "step": 492 } ], "logging_steps": 500, "max_steps": 656, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 1725464792721600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }