{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "eval_steps": 500, "global_step": 2484, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 276, "train_eval_accuracy": 0.5786848072562358, "train_eval_f1": 0.5273499989435928, "train_eval_loss": 1.8481072187423706, "train_eval_precision": 0.5941973878901876, "train_eval_recall": 0.572395136508943, "train_loss": 1.8481072187423706, "train_runtime": 268.102, "train_samples_per_second": 32.898, "train_steps_per_second": 1.029 }, { "epoch": 1.0, "eval_accuracy": 0.5494708994708994, "eval_f1": 0.5078140359989309, "eval_loss": 1.8939365148544312, "eval_precision": 0.5816333388253621, "eval_recall": 0.5635339213221277, "eval_runtime": 114.8487, "eval_samples_per_second": 32.913, "eval_steps_per_second": 1.036, "step": 276 }, { "epoch": 2.0, "step": 552, "train_eval_accuracy": 0.8439909297052154, "train_eval_f1": 0.8342492924641463, "train_eval_loss": 0.8016859889030457, "train_eval_precision": 0.8542755916165877, "train_eval_recall": 0.8407044685499563, "train_loss": 0.8016859292984009, "train_runtime": 267.8013, "train_samples_per_second": 32.935, "train_steps_per_second": 1.031 }, { "epoch": 2.0, "eval_accuracy": 0.8277777777777777, "eval_f1": 0.8242008421588933, "eval_loss": 0.8600034117698669, "eval_precision": 0.8391484412975262, "eval_recall": 0.8355373608358867, "eval_runtime": 114.9843, "eval_samples_per_second": 32.874, "eval_steps_per_second": 1.035, "step": 552 }, { "epoch": 3.0, "step": 828, "train_eval_accuracy": 0.9051020408163265, "train_eval_f1": 0.9035552222804545, "train_eval_loss": 0.4452792704105377, "train_eval_precision": 0.9086137959345946, "train_eval_recall": 0.9041820807070131, "train_loss": 0.4452792704105377, "train_runtime": 267.5894, "train_samples_per_second": 32.961, "train_steps_per_second": 1.031 }, { "epoch": 3.0, "eval_accuracy": 0.8804232804232804, "eval_f1": 0.8806958156074617, "eval_loss": 0.5410670638084412, "eval_precision": 0.8871882485699742, "eval_recall": 0.8835379115156438, "eval_runtime": 114.901, "eval_samples_per_second": 32.898, "eval_steps_per_second": 1.036, "step": 828 }, { "epoch": 4.0, "step": 1104, "train_eval_accuracy": 0.9252834467120181, "train_eval_f1": 0.9244941105182266, "train_eval_loss": 0.3236485421657562, "train_eval_precision": 0.9275441941761358, "train_eval_recall": 0.924775463750757, "train_loss": 0.32364851236343384, "train_runtime": 268.0076, "train_samples_per_second": 32.91, "train_steps_per_second": 1.03 }, { "epoch": 4.0, "eval_accuracy": 0.8992063492063492, "eval_f1": 0.8994418745237454, "eval_loss": 0.44155266880989075, "eval_precision": 0.9029497697773854, "eval_recall": 0.9012117948893439, "eval_runtime": 114.5928, "eval_samples_per_second": 32.986, "eval_steps_per_second": 1.038, "step": 1104 }, { "epoch": 5.0, "step": 1380, "train_eval_accuracy": 0.9377551020408164, "train_eval_f1": 0.9368640571091438, "train_eval_loss": 0.2548055350780487, "train_eval_precision": 0.9393905662960794, "train_eval_recall": 0.9370066740159024, "train_loss": 0.2548055350780487, "train_runtime": 266.9807, "train_samples_per_second": 33.036, "train_steps_per_second": 1.034 }, { "epoch": 5.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.9049060279992467, "eval_loss": 0.4057652950286865, "eval_precision": 0.9074489392634286, "eval_recall": 0.9071413991393202, "eval_runtime": 114.474, "eval_samples_per_second": 33.021, "eval_steps_per_second": 1.04, "step": 1380 }, { "epoch": 6.0, "step": 1656, "train_eval_accuracy": 0.9470521541950113, "train_eval_f1": 0.9465820465281242, "train_eval_loss": 0.21100522577762604, "train_eval_precision": 0.9475994271224741, "train_eval_recall": 0.9466568764400392, "train_loss": 0.21100524067878723, "train_runtime": 267.4217, "train_samples_per_second": 32.982, "train_steps_per_second": 1.032 }, { "epoch": 6.0, "eval_accuracy": 0.9097883597883598, "eval_f1": 0.9104147506557622, "eval_loss": 0.38457947969436646, "eval_precision": 0.9123207267374824, "eval_recall": 0.9116754514446468, "eval_runtime": 114.6488, "eval_samples_per_second": 32.97, "eval_steps_per_second": 1.038, "step": 1656 }, { "epoch": 7.0, "step": 1932, "train_eval_accuracy": 0.9549886621315192, "train_eval_f1": 0.9546212565256129, "train_eval_loss": 0.1847783774137497, "train_eval_precision": 0.9559007170334742, "train_eval_recall": 0.9545838862429018, "train_loss": 0.1847783923149109, "train_runtime": 266.8425, "train_samples_per_second": 33.053, "train_steps_per_second": 1.034 }, { "epoch": 7.0, "eval_accuracy": 0.9082010582010582, "eval_f1": 0.908576534948754, "eval_loss": 0.38885927200317383, "eval_precision": 0.911182168262048, "eval_recall": 0.9099497041696394, "eval_runtime": 114.6761, "eval_samples_per_second": 32.962, "eval_steps_per_second": 1.038, "step": 1932 }, { "epoch": 8.0, "step": 2208, "train_eval_accuracy": 0.962358276643991, "train_eval_f1": 0.9620445758202029, "train_eval_loss": 0.15657010674476624, "train_eval_precision": 0.9625235916902362, "train_eval_recall": 0.9619882239348216, "train_loss": 0.15657009184360504, "train_runtime": 267.8869, "train_samples_per_second": 32.924, "train_steps_per_second": 1.03 }, { "epoch": 8.0, "eval_accuracy": 0.91005291005291, "eval_f1": 0.9102033809946286, "eval_loss": 0.38644978404045105, "eval_precision": 0.9106144278267643, "eval_recall": 0.9117775212470368, "eval_runtime": 115.022, "eval_samples_per_second": 32.863, "eval_steps_per_second": 1.035, "step": 2208 }, { "epoch": 9.0, "step": 2484, "train_eval_accuracy": 0.9653061224489796, "train_eval_f1": 0.9649036069253767, "train_eval_loss": 0.13657130300998688, "train_eval_precision": 0.966025675321776, "train_eval_recall": 0.9649842543919382, "train_loss": 0.13657130300998688, "train_runtime": 267.8957, "train_samples_per_second": 32.923, "train_steps_per_second": 1.03 }, { "epoch": 9.0, "eval_accuracy": 0.9063492063492063, "eval_f1": 0.9069930228990634, "eval_loss": 0.4001522362232208, "eval_precision": 0.909308614451706, "eval_recall": 0.9081607205793748, "eval_runtime": 114.6165, "eval_samples_per_second": 32.98, "eval_steps_per_second": 1.038, "step": 2484 }, { "epoch": 9.0, "step": 2484, "total_flos": 2.08932565653504e+16, "train_loss": 0.7482529300806411, "train_runtime": 11065.7342, "train_samples_per_second": 15.941, "train_steps_per_second": 0.499 }, { "epoch": 9.0, "eval_accuracy": 0.9063492063492063, "eval_f1": 0.9069930228990634, "eval_loss": 0.4001522362232208, "eval_precision": 0.909308614451706, "eval_recall": 0.9081607205793748, "eval_runtime": 114.8255, "eval_samples_per_second": 32.92, "eval_steps_per_second": 1.036, "step": 2484 }, { "epoch": 9.0, "step": 2484, "train_en_eval_accuracy": 0.9653061224489796, "train_en_eval_f1": 0.9649036069253767, "train_en_eval_loss": 0.13657130300998688, "train_en_eval_precision": 0.966025675321776, "train_en_eval_recall": 0.9649842543919382, "train_en_loss": 0.13657130300998688, "train_en_runtime": 267.45, "train_en_samples_per_second": 32.978, "train_en_steps_per_second": 1.032 }, { "epoch": 9.0, "step": 2484, "test_en_eval_accuracy": 0.9063492063492063, "test_en_eval_f1": 0.9069930228990634, "test_en_eval_loss": 0.40015220642089844, "test_en_eval_precision": 0.909308614451706, "test_en_eval_recall": 0.9081607205793748, "test_en_loss": 0.4001522362232208, "test_en_runtime": 114.5156, "test_en_samples_per_second": 33.009, "test_en_steps_per_second": 1.039 } ], "logging_steps": 500, "max_steps": 5520, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.08932565653504e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }