{ "best_metric": 0.4186987578868866, "best_model_checkpoint": "/tmp/model/checkpoint-114", "epoch": 3.0, "eval_steps": 500, "global_step": 114, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.166666666666667e-06, "loss": 1.0986, "step": 1 }, { "epoch": 0.05, "learning_rate": 4.166666666666667e-06, "loss": 1.0986, "step": 2 }, { "epoch": 0.08, "learning_rate": 8.333333333333334e-06, "loss": 1.0986, "step": 3 }, { "epoch": 0.11, "learning_rate": 1.25e-05, "loss": 1.098, "step": 4 }, { "epoch": 0.13, "learning_rate": 1.6666666666666667e-05, "loss": 1.0968, "step": 5 }, { "epoch": 0.16, "learning_rate": 2.0833333333333336e-05, "loss": 1.0957, "step": 6 }, { "epoch": 0.18, "learning_rate": 2.0833333333333336e-05, "loss": 1.0952, "step": 7 }, { "epoch": 0.21, "learning_rate": 2.5e-05, "loss": 1.0934, "step": 8 }, { "epoch": 0.24, "learning_rate": 2.916666666666667e-05, "loss": 1.0903, "step": 9 }, { "epoch": 0.26, "learning_rate": 3.3333333333333335e-05, "loss": 1.0896, "step": 10 }, { "epoch": 0.29, "learning_rate": 3.7500000000000003e-05, "loss": 1.0911, "step": 11 }, { "epoch": 0.32, "learning_rate": 4.166666666666667e-05, "loss": 1.0736, "step": 12 }, { "epoch": 0.34, "learning_rate": 4.5833333333333334e-05, "loss": 1.1018, "step": 13 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 1.075, "step": 14 }, { "epoch": 0.39, "learning_rate": 4.9509803921568634e-05, "loss": 1.0414, "step": 15 }, { "epoch": 0.42, "learning_rate": 4.901960784313725e-05, "loss": 1.0614, "step": 16 }, { "epoch": 0.45, "learning_rate": 4.8529411764705885e-05, "loss": 1.0775, "step": 17 }, { "epoch": 0.47, "learning_rate": 4.803921568627452e-05, "loss": 1.0337, "step": 18 }, { "epoch": 0.5, "learning_rate": 4.7549019607843135e-05, "loss": 0.9952, "step": 19 }, { "epoch": 0.53, "learning_rate": 4.705882352941177e-05, "loss": 1.0467, "step": 20 }, { "epoch": 0.55, "learning_rate": 4.656862745098039e-05, "loss": 1.0225, "step": 21 }, { "epoch": 0.58, "learning_rate": 4.607843137254902e-05, "loss": 1.0948, "step": 22 }, { "epoch": 0.61, "learning_rate": 4.558823529411765e-05, "loss": 1.0724, "step": 23 }, { "epoch": 0.63, "learning_rate": 4.558823529411765e-05, "loss": 1.0368, "step": 24 }, { "epoch": 0.66, "learning_rate": 4.5098039215686275e-05, "loss": 1.046, "step": 25 }, { "epoch": 0.68, "learning_rate": 4.460784313725491e-05, "loss": 1.0462, "step": 26 }, { "epoch": 0.71, "learning_rate": 4.411764705882353e-05, "loss": 1.0037, "step": 27 }, { "epoch": 0.74, "learning_rate": 4.362745098039216e-05, "loss": 1.0037, "step": 28 }, { "epoch": 0.76, "learning_rate": 4.313725490196079e-05, "loss": 1.0308, "step": 29 }, { "epoch": 0.79, "learning_rate": 4.2647058823529415e-05, "loss": 1.0509, "step": 30 }, { "epoch": 0.82, "learning_rate": 4.215686274509804e-05, "loss": 1.0226, "step": 31 }, { "epoch": 0.84, "learning_rate": 4.166666666666667e-05, "loss": 0.9627, "step": 32 }, { "epoch": 0.87, "learning_rate": 4.11764705882353e-05, "loss": 0.9468, "step": 33 }, { "epoch": 0.89, "learning_rate": 4.068627450980392e-05, "loss": 0.9843, "step": 34 }, { "epoch": 0.92, "learning_rate": 4.0196078431372555e-05, "loss": 0.7904, "step": 35 }, { "epoch": 0.95, "learning_rate": 3.970588235294117e-05, "loss": 0.9915, "step": 36 }, { "epoch": 0.97, "learning_rate": 3.9215686274509805e-05, "loss": 0.9587, "step": 37 }, { "epoch": 1.0, "learning_rate": 3.872549019607844e-05, "loss": 1.003, "step": 38 }, { "epoch": 1.0, "eval_accuracy": 0.7105263157894737, "eval_f1_macro": 0.5220918441257424, "eval_f1_micro": 0.7105263157894737, "eval_f1_weighted": 0.6349642032068438, "eval_loss": 0.8949874043464661, "eval_precision_macro": 0.4696864111498258, "eval_precision_micro": 0.7105263157894737, "eval_precision_weighted": 0.5784705666605539, "eval_recall_macro": 0.5938438438438438, "eval_recall_micro": 0.7105263157894737, "eval_recall_weighted": 0.7105263157894737, "eval_runtime": 2.0351, "eval_samples_per_second": 37.344, "eval_steps_per_second": 2.457, "step": 38 }, { "epoch": 1.03, "learning_rate": 3.8235294117647055e-05, "loss": 1.0132, "step": 39 }, { "epoch": 1.05, "learning_rate": 3.8235294117647055e-05, "loss": 0.8854, "step": 40 }, { "epoch": 1.08, "learning_rate": 3.774509803921569e-05, "loss": 0.9694, "step": 41 }, { "epoch": 1.11, "learning_rate": 3.725490196078432e-05, "loss": 0.8182, "step": 42 }, { "epoch": 1.13, "learning_rate": 3.6764705882352945e-05, "loss": 0.9112, "step": 43 }, { "epoch": 1.16, "learning_rate": 3.627450980392157e-05, "loss": 0.8819, "step": 44 }, { "epoch": 1.18, "learning_rate": 3.5784313725490195e-05, "loss": 1.0679, "step": 45 }, { "epoch": 1.21, "learning_rate": 3.529411764705883e-05, "loss": 0.8977, "step": 46 }, { "epoch": 1.24, "learning_rate": 3.480392156862745e-05, "loss": 0.7788, "step": 47 }, { "epoch": 1.26, "learning_rate": 3.431372549019608e-05, "loss": 0.9201, "step": 48 }, { "epoch": 1.29, "learning_rate": 3.382352941176471e-05, "loss": 0.781, "step": 49 }, { "epoch": 1.32, "learning_rate": 3.3333333333333335e-05, "loss": 0.8634, "step": 50 }, { "epoch": 1.34, "learning_rate": 3.284313725490196e-05, "loss": 0.8995, "step": 51 }, { "epoch": 1.37, "learning_rate": 3.235294117647059e-05, "loss": 0.7176, "step": 52 }, { "epoch": 1.39, "learning_rate": 3.186274509803922e-05, "loss": 1.0019, "step": 53 }, { "epoch": 1.42, "learning_rate": 3.137254901960784e-05, "loss": 0.6014, "step": 54 }, { "epoch": 1.45, "learning_rate": 3.0882352941176475e-05, "loss": 0.6722, "step": 55 }, { "epoch": 1.47, "learning_rate": 3.0392156862745097e-05, "loss": 0.8279, "step": 56 }, { "epoch": 1.5, "learning_rate": 2.9901960784313725e-05, "loss": 0.7848, "step": 57 }, { "epoch": 1.53, "learning_rate": 2.9411764705882354e-05, "loss": 0.8737, "step": 58 }, { "epoch": 1.55, "learning_rate": 2.8921568627450986e-05, "loss": 0.679, "step": 59 }, { "epoch": 1.58, "learning_rate": 2.8431372549019608e-05, "loss": 0.5924, "step": 60 }, { "epoch": 1.61, "learning_rate": 2.7941176470588236e-05, "loss": 0.6882, "step": 61 }, { "epoch": 1.63, "learning_rate": 2.7450980392156865e-05, "loss": 0.5811, "step": 62 }, { "epoch": 1.66, "learning_rate": 2.696078431372549e-05, "loss": 0.6237, "step": 63 }, { "epoch": 1.68, "learning_rate": 2.647058823529412e-05, "loss": 0.7408, "step": 64 }, { "epoch": 1.71, "learning_rate": 2.5980392156862747e-05, "loss": 0.5923, "step": 65 }, { "epoch": 1.74, "learning_rate": 2.5490196078431373e-05, "loss": 0.53, "step": 66 }, { "epoch": 1.76, "learning_rate": 2.5e-05, "loss": 0.8175, "step": 67 }, { "epoch": 1.79, "learning_rate": 2.4509803921568626e-05, "loss": 0.816, "step": 68 }, { "epoch": 1.82, "learning_rate": 2.401960784313726e-05, "loss": 0.6064, "step": 69 }, { "epoch": 1.84, "learning_rate": 2.3529411764705884e-05, "loss": 0.7559, "step": 70 }, { "epoch": 1.87, "learning_rate": 2.303921568627451e-05, "loss": 0.6855, "step": 71 }, { "epoch": 1.89, "learning_rate": 2.2549019607843138e-05, "loss": 0.4998, "step": 72 }, { "epoch": 1.92, "learning_rate": 2.2058823529411766e-05, "loss": 0.3965, "step": 73 }, { "epoch": 1.95, "learning_rate": 2.1568627450980395e-05, "loss": 0.2904, "step": 74 }, { "epoch": 1.97, "learning_rate": 2.107843137254902e-05, "loss": 0.5538, "step": 75 }, { "epoch": 2.0, "learning_rate": 2.058823529411765e-05, "loss": 0.6493, "step": 76 }, { "epoch": 2.0, "eval_accuracy": 0.8552631578947368, "eval_f1_macro": 0.766620230632304, "eval_f1_micro": 0.8552631578947367, "eval_f1_weighted": 0.8357707159703299, "eval_loss": 0.508880615234375, "eval_precision_macro": 0.8952380952380953, "eval_precision_micro": 0.8552631578947368, "eval_precision_weighted": 0.9007518796992481, "eval_recall_macro": 0.7687687687687688, "eval_recall_micro": 0.8552631578947368, "eval_recall_weighted": 0.8552631578947368, "eval_runtime": 2.0308, "eval_samples_per_second": 37.425, "eval_steps_per_second": 2.462, "step": 76 }, { "epoch": 2.03, "learning_rate": 2.0098039215686277e-05, "loss": 0.6863, "step": 77 }, { "epoch": 2.05, "learning_rate": 1.9607843137254903e-05, "loss": 0.6366, "step": 78 }, { "epoch": 2.08, "learning_rate": 1.9117647058823528e-05, "loss": 0.6654, "step": 79 }, { "epoch": 2.11, "learning_rate": 1.862745098039216e-05, "loss": 0.6877, "step": 80 }, { "epoch": 2.13, "learning_rate": 1.8137254901960785e-05, "loss": 0.4684, "step": 81 }, { "epoch": 2.16, "learning_rate": 1.7647058823529414e-05, "loss": 0.8363, "step": 82 }, { "epoch": 2.18, "learning_rate": 1.715686274509804e-05, "loss": 0.49, "step": 83 }, { "epoch": 2.21, "learning_rate": 1.6666666666666667e-05, "loss": 0.3663, "step": 84 }, { "epoch": 2.24, "learning_rate": 1.6176470588235296e-05, "loss": 0.4103, "step": 85 }, { "epoch": 2.26, "learning_rate": 1.568627450980392e-05, "loss": 0.6288, "step": 86 }, { "epoch": 2.29, "learning_rate": 1.5196078431372548e-05, "loss": 0.5966, "step": 87 }, { "epoch": 2.32, "learning_rate": 1.4705882352941177e-05, "loss": 0.5132, "step": 88 }, { "epoch": 2.34, "learning_rate": 1.4215686274509804e-05, "loss": 0.3739, "step": 89 }, { "epoch": 2.37, "learning_rate": 1.3725490196078432e-05, "loss": 0.6379, "step": 90 }, { "epoch": 2.39, "learning_rate": 1.323529411764706e-05, "loss": 0.7557, "step": 91 }, { "epoch": 2.42, "learning_rate": 1.2745098039215686e-05, "loss": 0.3617, "step": 92 }, { "epoch": 2.45, "learning_rate": 1.2254901960784313e-05, "loss": 0.2977, "step": 93 }, { "epoch": 2.47, "learning_rate": 1.1764705882352942e-05, "loss": 0.9881, "step": 94 }, { "epoch": 2.5, "learning_rate": 1.1274509803921569e-05, "loss": 0.7618, "step": 95 }, { "epoch": 2.53, "learning_rate": 1.0784313725490197e-05, "loss": 0.3369, "step": 96 }, { "epoch": 2.55, "learning_rate": 1.0294117647058824e-05, "loss": 0.4169, "step": 97 }, { "epoch": 2.58, "learning_rate": 9.803921568627451e-06, "loss": 0.6706, "step": 98 }, { "epoch": 2.61, "learning_rate": 9.31372549019608e-06, "loss": 0.634, "step": 99 }, { "epoch": 2.63, "learning_rate": 8.823529411764707e-06, "loss": 0.2243, "step": 100 }, { "epoch": 2.66, "learning_rate": 8.333333333333334e-06, "loss": 0.8181, "step": 101 }, { "epoch": 2.68, "learning_rate": 7.84313725490196e-06, "loss": 0.5856, "step": 102 }, { "epoch": 2.71, "learning_rate": 7.3529411764705884e-06, "loss": 0.4225, "step": 103 }, { "epoch": 2.74, "learning_rate": 6.862745098039216e-06, "loss": 0.3595, "step": 104 }, { "epoch": 2.76, "learning_rate": 6.372549019607843e-06, "loss": 0.5012, "step": 105 }, { "epoch": 2.79, "learning_rate": 5.882352941176471e-06, "loss": 0.7054, "step": 106 }, { "epoch": 2.82, "learning_rate": 5.392156862745099e-06, "loss": 0.539, "step": 107 }, { "epoch": 2.84, "learning_rate": 4.901960784313726e-06, "loss": 0.4866, "step": 108 }, { "epoch": 2.87, "learning_rate": 4.411764705882353e-06, "loss": 0.4528, "step": 109 }, { "epoch": 2.89, "learning_rate": 3.92156862745098e-06, "loss": 0.4653, "step": 110 }, { "epoch": 2.92, "learning_rate": 3.431372549019608e-06, "loss": 0.5724, "step": 111 }, { "epoch": 2.95, "learning_rate": 2.9411764705882355e-06, "loss": 0.5088, "step": 112 }, { "epoch": 2.97, "learning_rate": 2.450980392156863e-06, "loss": 0.7226, "step": 113 }, { "epoch": 3.0, "learning_rate": 1.96078431372549e-06, "loss": 0.2725, "step": 114 }, { "epoch": 3.0, "eval_accuracy": 0.881578947368421, "eval_f1_macro": 0.8215900897948484, "eval_f1_micro": 0.881578947368421, "eval_f1_weighted": 0.8716990992917834, "eval_loss": 0.4186987578868866, "eval_precision_macro": 0.9090909090909092, "eval_precision_micro": 0.881578947368421, "eval_precision_weighted": 0.9138755980861244, "eval_recall_macro": 0.8132132132132132, "eval_recall_micro": 0.881578947368421, "eval_recall_weighted": 0.881578947368421, "eval_runtime": 2.034, "eval_samples_per_second": 37.365, "eval_steps_per_second": 2.458, "step": 114 } ], "logging_steps": 1, "max_steps": 114, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 6.994598554602086e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }