{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 3240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 270, "train_eval_accuracy": 0.4543554006968641, "train_eval_f1": 0.3920024252155857, "train_eval_loss": 2.213092565536499, "train_eval_precision": 0.5049415352292027, "train_eval_recall": 0.4502057276533155, "train_loss": 2.213092565536499, "train_runtime": 135.647, "train_samples_per_second": 63.474, "train_steps_per_second": 1.99 }, { "epoch": 1.0, "eval_accuracy": 0.4268292682926829, "eval_f1": 0.3705093335850676, "eval_loss": 2.241243839263916, "eval_precision": 0.4561282093493303, "eval_recall": 0.4361469763776722, "eval_runtime": 58.1481, "eval_samples_per_second": 63.459, "eval_steps_per_second": 1.995, "step": 270 }, { "epoch": 2.0, "step": 540, "train_eval_accuracy": 0.7466898954703832, "train_eval_f1": 0.7142827956029404, "train_eval_loss": 1.0307879447937012, "train_eval_precision": 0.7636517145518492, "train_eval_recall": 0.7488504127171401, "train_loss": 1.0307878255844116, "train_runtime": 135.0619, "train_samples_per_second": 63.749, "train_steps_per_second": 1.999 }, { "epoch": 2.0, "eval_accuracy": 0.7468834688346884, "eval_f1": 0.7093879817407956, "eval_loss": 1.0710593461990356, "eval_precision": 0.7202966158698229, "eval_recall": 0.7418325179501348, "eval_runtime": 57.9543, "eval_samples_per_second": 63.671, "eval_steps_per_second": 2.002, "step": 540 }, { "epoch": 3.0, "step": 810, "train_eval_accuracy": 0.8695702671312427, "train_eval_f1": 0.861820476818951, "train_eval_loss": 0.5579802989959717, "train_eval_precision": 0.8870817771022802, "train_eval_recall": 0.8687368970385886, "train_loss": 0.5579802989959717, "train_runtime": 135.4146, "train_samples_per_second": 63.583, "train_steps_per_second": 1.994 }, { "epoch": 3.0, "eval_accuracy": 0.8552845528455284, "eval_f1": 0.8495416886177747, "eval_loss": 0.6178643703460693, "eval_precision": 0.8734034389743888, "eval_recall": 0.8576311610486081, "eval_runtime": 58.0397, "eval_samples_per_second": 63.577, "eval_steps_per_second": 1.999, "step": 810 }, { "epoch": 4.0, "step": 1080, "train_eval_accuracy": 0.9222996515679442, "train_eval_f1": 0.9213466868616371, "train_eval_loss": 0.32642877101898193, "train_eval_precision": 0.9241731833923874, "train_eval_recall": 0.9212168308984705, "train_loss": 0.3264288008213043, "train_runtime": 135.2547, "train_samples_per_second": 63.658, "train_steps_per_second": 1.996 }, { "epoch": 4.0, "eval_accuracy": 0.8948509485094851, "eval_f1": 0.896062119626839, "eval_loss": 0.425402969121933, "eval_precision": 0.898102077222129, "eval_recall": 0.8978652555840756, "eval_runtime": 58.1141, "eval_samples_per_second": 63.496, "eval_steps_per_second": 1.996, "step": 1080 }, { "epoch": 5.0, "step": 1350, "train_eval_accuracy": 0.9347270615563299, "train_eval_f1": 0.9341307787821527, "train_eval_loss": 0.2624934911727905, "train_eval_precision": 0.9374908232103657, "train_eval_recall": 0.9338864711477163, "train_loss": 0.2624934911727905, "train_runtime": 135.553, "train_samples_per_second": 63.518, "train_steps_per_second": 1.992 }, { "epoch": 5.0, "eval_accuracy": 0.9013550135501355, "eval_f1": 0.9031622172297773, "eval_loss": 0.3942576050758362, "eval_precision": 0.9068040272106185, "eval_recall": 0.9037180254823993, "eval_runtime": 58.0487, "eval_samples_per_second": 63.567, "eval_steps_per_second": 1.998, "step": 1350 }, { "epoch": 6.0, "step": 1620, "train_eval_accuracy": 0.9369337979094077, "train_eval_f1": 0.9367169561056452, "train_eval_loss": 0.2356746643781662, "train_eval_precision": 0.9409168849056493, "train_eval_recall": 0.9363740919632174, "train_loss": 0.235674649477005, "train_runtime": 135.3099, "train_samples_per_second": 63.632, "train_steps_per_second": 1.995 }, { "epoch": 6.0, "eval_accuracy": 0.8967479674796748, "eval_f1": 0.8985613086912232, "eval_loss": 0.39477989077568054, "eval_precision": 0.9046020298861014, "eval_recall": 0.8985670699751195, "eval_runtime": 58.0672, "eval_samples_per_second": 63.547, "eval_steps_per_second": 1.998, "step": 1620 }, { "epoch": 7.0, "step": 1890, "train_eval_accuracy": 0.9551684088269454, "train_eval_f1": 0.954651454135631, "train_eval_loss": 0.17541687190532684, "train_eval_precision": 0.9557858328831473, "train_eval_recall": 0.9546927481898011, "train_loss": 0.17541685700416565, "train_runtime": 135.3144, "train_samples_per_second": 63.63, "train_steps_per_second": 1.995 }, { "epoch": 7.0, "eval_accuracy": 0.9084010840108401, "eval_f1": 0.9096283406922946, "eval_loss": 0.36582455039024353, "eval_precision": 0.9114465044928155, "eval_recall": 0.9108233562780595, "eval_runtime": 57.881, "eval_samples_per_second": 63.751, "eval_steps_per_second": 2.004, "step": 1890 }, { "epoch": 8.0, "step": 2160, "train_eval_accuracy": 0.9613240418118467, "train_eval_f1": 0.9608151518995849, "train_eval_loss": 0.14420288801193237, "train_eval_precision": 0.9623834153031047, "train_eval_recall": 0.9608262481602913, "train_loss": 0.14420288801193237, "train_runtime": 135.0674, "train_samples_per_second": 63.746, "train_steps_per_second": 1.999 }, { "epoch": 8.0, "eval_accuracy": 0.9102981029810298, "eval_f1": 0.9113509086088316, "eval_loss": 0.3648086190223694, "eval_precision": 0.9130676211210293, "eval_recall": 0.9126224114576494, "eval_runtime": 57.9595, "eval_samples_per_second": 63.665, "eval_steps_per_second": 2.001, "step": 2160 }, { "epoch": 9.0, "step": 2430, "train_eval_accuracy": 0.9665505226480836, "train_eval_f1": 0.9661383014293797, "train_eval_loss": 0.12614580988883972, "train_eval_precision": 0.9666400751855355, "train_eval_recall": 0.9662026462661183, "train_loss": 0.12614580988883972, "train_runtime": 135.3324, "train_samples_per_second": 63.621, "train_steps_per_second": 1.995 }, { "epoch": 9.0, "eval_accuracy": 0.9140921409214092, "eval_f1": 0.9150974073507481, "eval_loss": 0.3741607964038849, "eval_precision": 0.9164572952906177, "eval_recall": 0.9163852090461947, "eval_runtime": 58.0191, "eval_samples_per_second": 63.6, "eval_steps_per_second": 1.999, "step": 2430 }, { "epoch": 10.0, "step": 2700, "train_eval_accuracy": 0.9734030197444832, "train_eval_f1": 0.9731286369086158, "train_eval_loss": 0.10438817739486694, "train_eval_precision": 0.9733845792387448, "train_eval_recall": 0.9731881420490307, "train_loss": 0.10438817739486694, "train_runtime": 135.1816, "train_samples_per_second": 63.692, "train_steps_per_second": 1.997 }, { "epoch": 10.0, "eval_accuracy": 0.9105691056910569, "eval_f1": 0.9117504718333816, "eval_loss": 0.3793221414089203, "eval_precision": 0.9133737833127124, "eval_recall": 0.9125232970571355, "eval_runtime": 57.8562, "eval_samples_per_second": 63.779, "eval_steps_per_second": 2.005, "step": 2700 }, { "epoch": 11.0, "step": 2970, "train_eval_accuracy": 0.9768873403019744, "train_eval_f1": 0.976601423843683, "train_eval_loss": 0.08366803079843521, "train_eval_precision": 0.9770579602162861, "train_eval_recall": 0.9766316554964214, "train_loss": 0.08366803079843521, "train_runtime": 135.1786, "train_samples_per_second": 63.694, "train_steps_per_second": 1.997 }, { "epoch": 11.0, "eval_accuracy": 0.9121951219512195, "eval_f1": 0.9135174979767972, "eval_loss": 0.3827652931213379, "eval_precision": 0.9154501063771586, "eval_recall": 0.9144773585604222, "eval_runtime": 58.1085, "eval_samples_per_second": 63.502, "eval_steps_per_second": 1.996, "step": 2970 }, { "epoch": 12.0, "step": 3240, "train_eval_accuracy": 0.9814169570267132, "train_eval_f1": 0.981230750145857, "train_eval_loss": 0.07012941688299179, "train_eval_precision": 0.9813761063026012, "train_eval_recall": 0.9812601020344451, "train_loss": 0.0701294094324112, "train_runtime": 135.0899, "train_samples_per_second": 63.735, "train_steps_per_second": 1.999 }, { "epoch": 12.0, "eval_accuracy": 0.9138211382113821, "eval_f1": 0.9153233063905818, "eval_loss": 0.3999248445034027, "eval_precision": 0.9164164725022964, "eval_recall": 0.9158851947670714, "eval_runtime": 58.1781, "eval_samples_per_second": 63.426, "eval_steps_per_second": 1.994, "step": 3240 }, { "epoch": 12.0, "step": 3240, "total_flos": 1.369605074374656e+16, "train_loss": 0.6536671579619985, "train_runtime": 7491.5732, "train_samples_per_second": 22.986, "train_steps_per_second": 0.721 }, { "epoch": 12.0, "eval_accuracy": 0.9138211382113821, "eval_f1": 0.9153233063905818, "eval_loss": 0.3999248445034027, "eval_precision": 0.9164164725022964, "eval_recall": 0.9158851947670714, "eval_runtime": 58.1601, "eval_samples_per_second": 63.446, "eval_steps_per_second": 1.994, "step": 3240 }, { "epoch": 12.0, "step": 3240, "train_en_eval_accuracy": 0.9814169570267132, "train_en_eval_f1": 0.981230750145857, "train_en_eval_loss": 0.07012941688299179, "train_en_eval_precision": 0.9813761063026012, "train_en_eval_recall": 0.9812601020344451, "train_en_loss": 0.0701294094324112, "train_en_runtime": 134.0905, "train_en_samples_per_second": 64.21, "train_en_steps_per_second": 2.014 }, { "epoch": 12.0, "step": 3240, "test_en_eval_accuracy": 0.9138211382113821, "test_en_eval_f1": 0.9153233063905818, "test_en_eval_loss": 0.3999248445034027, "test_en_eval_precision": 0.9164164725022964, "test_en_eval_recall": 0.9158851947670714, "test_en_loss": 0.3999248445034027, "test_en_runtime": 57.7645, "test_en_samples_per_second": 63.88, "test_en_steps_per_second": 2.008 } ], "logging_steps": 500, "max_steps": 5400, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.369605074374656e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }