{ "best_metric": 0.016336046159267426, "best_model_checkpoint": "/temp/t5_base-qg-ap-test/checkpoint-100", "epoch": 100.0, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 12.805366516113281, "eval_runtime": 0.2208, "eval_samples_per_second": 36.238, "eval_steps_per_second": 4.53, "step": 1 }, { "epoch": 2.0, "eval_loss": 10.788012504577637, "eval_runtime": 0.2356, "eval_samples_per_second": 33.954, "eval_steps_per_second": 4.244, "step": 2 }, { "epoch": 3.0, "eval_loss": 8.873129844665527, "eval_runtime": 0.2362, "eval_samples_per_second": 33.872, "eval_steps_per_second": 4.234, "step": 3 }, { "epoch": 4.0, "eval_loss": 7.4068284034729, "eval_runtime": 0.2351, "eval_samples_per_second": 34.03, "eval_steps_per_second": 4.254, "step": 4 }, { "epoch": 5.0, "eval_loss": 6.458061218261719, "eval_runtime": 0.2334, "eval_samples_per_second": 34.275, "eval_steps_per_second": 4.284, "step": 5 }, { "epoch": 6.0, "eval_loss": 5.647502422332764, "eval_runtime": 0.2335, "eval_samples_per_second": 34.259, "eval_steps_per_second": 4.282, "step": 6 }, { "epoch": 7.0, "eval_loss": 4.959558486938477, "eval_runtime": 0.2256, "eval_samples_per_second": 35.464, "eval_steps_per_second": 4.433, "step": 7 }, { "epoch": 8.0, "eval_loss": 4.50578498840332, "eval_runtime": 0.2359, "eval_samples_per_second": 33.916, "eval_steps_per_second": 4.239, "step": 8 }, { "epoch": 9.0, "eval_loss": 4.076832294464111, "eval_runtime": 0.2354, "eval_samples_per_second": 33.984, "eval_steps_per_second": 4.248, "step": 9 }, { "epoch": 10.0, "eval_loss": 3.704714059829712, "eval_runtime": 0.2353, "eval_samples_per_second": 33.999, "eval_steps_per_second": 4.25, "step": 10 }, { "epoch": 11.0, "eval_loss": 3.4143009185791016, "eval_runtime": 0.2359, "eval_samples_per_second": 33.914, "eval_steps_per_second": 4.239, "step": 11 }, { "epoch": 12.0, "eval_loss": 3.1360208988189697, "eval_runtime": 0.2359, "eval_samples_per_second": 33.913, "eval_steps_per_second": 4.239, "step": 12 }, { "epoch": 13.0, "eval_loss": 2.8865740299224854, "eval_runtime": 0.2356, "eval_samples_per_second": 33.955, "eval_steps_per_second": 4.244, "step": 13 }, { "epoch": 14.0, "eval_loss": 2.6324548721313477, "eval_runtime": 0.234, "eval_samples_per_second": 34.185, "eval_steps_per_second": 4.273, "step": 14 }, { "epoch": 15.0, "eval_loss": 2.388902187347412, "eval_runtime": 0.2365, "eval_samples_per_second": 33.826, "eval_steps_per_second": 4.228, "step": 15 }, { "epoch": 16.0, "eval_loss": 2.1914408206939697, "eval_runtime": 0.2324, "eval_samples_per_second": 34.427, "eval_steps_per_second": 4.303, "step": 16 }, { "epoch": 17.0, "eval_loss": 2.0423526763916016, "eval_runtime": 0.2363, "eval_samples_per_second": 33.85, "eval_steps_per_second": 4.231, "step": 17 }, { "epoch": 18.0, "eval_loss": 1.9110742807388306, "eval_runtime": 0.2247, "eval_samples_per_second": 35.611, "eval_steps_per_second": 4.451, "step": 18 }, { "epoch": 19.0, "eval_loss": 1.7762634754180908, "eval_runtime": 0.2365, "eval_samples_per_second": 33.825, "eval_steps_per_second": 4.228, "step": 19 }, { "epoch": 20.0, "eval_loss": 1.6505231857299805, "eval_runtime": 0.2253, "eval_samples_per_second": 35.504, "eval_steps_per_second": 4.438, "step": 20 }, { "epoch": 21.0, "eval_loss": 1.525721788406372, "eval_runtime": 0.2362, "eval_samples_per_second": 33.872, "eval_steps_per_second": 4.234, "step": 21 }, { "epoch": 22.0, "eval_loss": 1.4125868082046509, "eval_runtime": 0.235, "eval_samples_per_second": 34.046, "eval_steps_per_second": 4.256, "step": 22 }, { "epoch": 23.0, "eval_loss": 1.3109110593795776, "eval_runtime": 0.2369, "eval_samples_per_second": 33.769, "eval_steps_per_second": 4.221, "step": 23 }, { "epoch": 24.0, "eval_loss": 1.2188917398452759, "eval_runtime": 0.2319, "eval_samples_per_second": 34.5, "eval_steps_per_second": 4.312, "step": 24 }, { "epoch": 25.0, "eval_loss": 1.1338324546813965, "eval_runtime": 0.2362, "eval_samples_per_second": 33.873, "eval_steps_per_second": 4.234, "step": 25 }, { "epoch": 26.0, "eval_loss": 1.0485577583312988, "eval_runtime": 0.2311, "eval_samples_per_second": 34.62, "eval_steps_per_second": 4.328, "step": 26 }, { "epoch": 27.0, "eval_loss": 0.9640414118766785, "eval_runtime": 0.2359, "eval_samples_per_second": 33.914, "eval_steps_per_second": 4.239, "step": 27 }, { "epoch": 28.0, "eval_loss": 0.8827559947967529, "eval_runtime": 0.2268, "eval_samples_per_second": 35.271, "eval_steps_per_second": 4.409, "step": 28 }, { "epoch": 29.0, "eval_loss": 0.8060356378555298, "eval_runtime": 0.2364, "eval_samples_per_second": 33.848, "eval_steps_per_second": 4.231, "step": 29 }, { "epoch": 30.0, "eval_loss": 0.7329221367835999, "eval_runtime": 0.2336, "eval_samples_per_second": 34.244, "eval_steps_per_second": 4.281, "step": 30 }, { "epoch": 31.0, "eval_loss": 0.6638815402984619, "eval_runtime": 0.2359, "eval_samples_per_second": 33.912, "eval_steps_per_second": 4.239, "step": 31 }, { "epoch": 32.0, "eval_loss": 0.6010197997093201, "eval_runtime": 0.235, "eval_samples_per_second": 34.042, "eval_steps_per_second": 4.255, "step": 32 }, { "epoch": 33.0, "eval_loss": 0.5438850522041321, "eval_runtime": 0.2331, "eval_samples_per_second": 34.324, "eval_steps_per_second": 4.29, "step": 33 }, { "epoch": 34.0, "eval_loss": 0.4924549162387848, "eval_runtime": 0.2353, "eval_samples_per_second": 34.004, "eval_steps_per_second": 4.25, "step": 34 }, { "epoch": 35.0, "eval_loss": 0.4471151828765869, "eval_runtime": 0.2413, "eval_samples_per_second": 33.158, "eval_steps_per_second": 4.145, "step": 35 }, { "epoch": 36.0, "eval_loss": 0.4065961539745331, "eval_runtime": 0.2355, "eval_samples_per_second": 33.966, "eval_steps_per_second": 4.246, "step": 36 }, { "epoch": 37.0, "eval_loss": 0.3690074682235718, "eval_runtime": 0.2363, "eval_samples_per_second": 33.853, "eval_steps_per_second": 4.232, "step": 37 }, { "epoch": 38.0, "eval_loss": 0.3340989649295807, "eval_runtime": 0.2363, "eval_samples_per_second": 33.86, "eval_steps_per_second": 4.232, "step": 38 }, { "epoch": 39.0, "eval_loss": 0.3023061454296112, "eval_runtime": 0.2338, "eval_samples_per_second": 34.218, "eval_steps_per_second": 4.277, "step": 39 }, { "epoch": 40.0, "eval_loss": 0.27456292510032654, "eval_runtime": 0.2359, "eval_samples_per_second": 33.907, "eval_steps_per_second": 4.238, "step": 40 }, { "epoch": 41.0, "eval_loss": 0.24695347249507904, "eval_runtime": 0.2296, "eval_samples_per_second": 34.843, "eval_steps_per_second": 4.355, "step": 41 }, { "epoch": 42.0, "eval_loss": 0.220541313290596, "eval_runtime": 0.2354, "eval_samples_per_second": 33.99, "eval_steps_per_second": 4.249, "step": 42 }, { "epoch": 43.0, "eval_loss": 0.19677509367465973, "eval_runtime": 0.2327, "eval_samples_per_second": 34.379, "eval_steps_per_second": 4.297, "step": 43 }, { "epoch": 44.0, "eval_loss": 0.17713746428489685, "eval_runtime": 0.2348, "eval_samples_per_second": 34.066, "eval_steps_per_second": 4.258, "step": 44 }, { "epoch": 45.0, "eval_loss": 0.15933585166931152, "eval_runtime": 0.2234, "eval_samples_per_second": 35.814, "eval_steps_per_second": 4.477, "step": 45 }, { "epoch": 46.0, "eval_loss": 0.14242056012153625, "eval_runtime": 0.2366, "eval_samples_per_second": 33.807, "eval_steps_per_second": 4.226, "step": 46 }, { "epoch": 47.0, "eval_loss": 0.1287701427936554, "eval_runtime": 0.2313, "eval_samples_per_second": 34.594, "eval_steps_per_second": 4.324, "step": 47 }, { "epoch": 48.0, "eval_loss": 0.11695855855941772, "eval_runtime": 0.2269, "eval_samples_per_second": 35.251, "eval_steps_per_second": 4.406, "step": 48 }, { "epoch": 49.0, "eval_loss": 0.1070137694478035, "eval_runtime": 0.2278, "eval_samples_per_second": 35.111, "eval_steps_per_second": 4.389, "step": 49 }, { "epoch": 50.0, "eval_loss": 0.09962165355682373, "eval_runtime": 0.2347, "eval_samples_per_second": 34.085, "eval_steps_per_second": 4.261, "step": 50 }, { "epoch": 51.0, "eval_loss": 0.09394610673189163, "eval_runtime": 0.2348, "eval_samples_per_second": 34.077, "eval_steps_per_second": 4.26, "step": 51 }, { "epoch": 52.0, "eval_loss": 0.08877500891685486, "eval_runtime": 0.2326, "eval_samples_per_second": 34.394, "eval_steps_per_second": 4.299, "step": 52 }, { "epoch": 53.0, "eval_loss": 0.08450286090373993, "eval_runtime": 0.2348, "eval_samples_per_second": 34.07, "eval_steps_per_second": 4.259, "step": 53 }, { "epoch": 54.0, "eval_loss": 0.0817728266119957, "eval_runtime": 0.2313, "eval_samples_per_second": 34.588, "eval_steps_per_second": 4.324, "step": 54 }, { "epoch": 55.0, "eval_loss": 0.07895343005657196, "eval_runtime": 0.2359, "eval_samples_per_second": 33.915, "eval_steps_per_second": 4.239, "step": 55 }, { "epoch": 56.0, "eval_loss": 0.07630708068609238, "eval_runtime": 0.2283, "eval_samples_per_second": 35.038, "eval_steps_per_second": 4.38, "step": 56 }, { "epoch": 57.0, "eval_loss": 0.0731731578707695, "eval_runtime": 0.2364, "eval_samples_per_second": 33.835, "eval_steps_per_second": 4.229, "step": 57 }, { "epoch": 58.0, "eval_loss": 0.06972303986549377, "eval_runtime": 0.2275, "eval_samples_per_second": 35.171, "eval_steps_per_second": 4.396, "step": 58 }, { "epoch": 59.0, "eval_loss": 0.06655264645814896, "eval_runtime": 0.2357, "eval_samples_per_second": 33.941, "eval_steps_per_second": 4.243, "step": 59 }, { "epoch": 60.0, "eval_loss": 0.06421676278114319, "eval_runtime": 0.2353, "eval_samples_per_second": 34.001, "eval_steps_per_second": 4.25, "step": 60 }, { "epoch": 61.0, "eval_loss": 0.06110429763793945, "eval_runtime": 0.2361, "eval_samples_per_second": 33.886, "eval_steps_per_second": 4.236, "step": 61 }, { "epoch": 62.0, "eval_loss": 0.05834279954433441, "eval_runtime": 0.2379, "eval_samples_per_second": 33.624, "eval_steps_per_second": 4.203, "step": 62 }, { "epoch": 63.0, "eval_loss": 0.055961962789297104, "eval_runtime": 0.2264, "eval_samples_per_second": 35.335, "eval_steps_per_second": 4.417, "step": 63 }, { "epoch": 64.0, "eval_loss": 0.05323232710361481, "eval_runtime": 0.2359, "eval_samples_per_second": 33.916, "eval_steps_per_second": 4.24, "step": 64 }, { "epoch": 65.0, "eval_loss": 0.051185671240091324, "eval_runtime": 0.2338, "eval_samples_per_second": 34.213, "eval_steps_per_second": 4.277, "step": 65 }, { "epoch": 66.0, "eval_loss": 0.04865783825516701, "eval_runtime": 0.2358, "eval_samples_per_second": 33.928, "eval_steps_per_second": 4.241, "step": 66 }, { "epoch": 67.0, "eval_loss": 0.04639100283384323, "eval_runtime": 0.2281, "eval_samples_per_second": 35.066, "eval_steps_per_second": 4.383, "step": 67 }, { "epoch": 68.0, "eval_loss": 0.04309353977441788, "eval_runtime": 0.2365, "eval_samples_per_second": 33.827, "eval_steps_per_second": 4.228, "step": 68 }, { "epoch": 69.0, "eval_loss": 0.03992551565170288, "eval_runtime": 0.2324, "eval_samples_per_second": 34.422, "eval_steps_per_second": 4.303, "step": 69 }, { "epoch": 70.0, "eval_loss": 0.03812782093882561, "eval_runtime": 0.2236, "eval_samples_per_second": 35.783, "eval_steps_per_second": 4.473, "step": 70 }, { "epoch": 71.0, "eval_loss": 0.03636465594172478, "eval_runtime": 0.2325, "eval_samples_per_second": 34.401, "eval_steps_per_second": 4.3, "step": 71 }, { "epoch": 72.0, "eval_loss": 0.034834641963243484, "eval_runtime": 0.2358, "eval_samples_per_second": 33.926, "eval_steps_per_second": 4.241, "step": 72 }, { "epoch": 73.0, "eval_loss": 0.03329307958483696, "eval_runtime": 0.2345, "eval_samples_per_second": 34.111, "eval_steps_per_second": 4.264, "step": 73 }, { "epoch": 74.0, "eval_loss": 0.031552691012620926, "eval_runtime": 0.2364, "eval_samples_per_second": 33.845, "eval_steps_per_second": 4.231, "step": 74 }, { "epoch": 75.0, "eval_loss": 0.029882650822401047, "eval_runtime": 0.2329, "eval_samples_per_second": 34.351, "eval_steps_per_second": 4.294, "step": 75 }, { "epoch": 76.0, "eval_loss": 0.028516214340925217, "eval_runtime": 0.2359, "eval_samples_per_second": 33.907, "eval_steps_per_second": 4.238, "step": 76 }, { "epoch": 77.0, "eval_loss": 0.027370158582925797, "eval_runtime": 0.2246, "eval_samples_per_second": 35.624, "eval_steps_per_second": 4.453, "step": 77 }, { "epoch": 78.0, "eval_loss": 0.026426443830132484, "eval_runtime": 0.2254, "eval_samples_per_second": 35.492, "eval_steps_per_second": 4.436, "step": 78 }, { "epoch": 79.0, "eval_loss": 0.02534804865717888, "eval_runtime": 0.2295, "eval_samples_per_second": 34.856, "eval_steps_per_second": 4.357, "step": 79 }, { "epoch": 80.0, "eval_loss": 0.024182336404919624, "eval_runtime": 0.2363, "eval_samples_per_second": 33.856, "eval_steps_per_second": 4.232, "step": 80 }, { "epoch": 81.0, "eval_loss": 0.023593546822667122, "eval_runtime": 0.2361, "eval_samples_per_second": 33.881, "eval_steps_per_second": 4.235, "step": 81 }, { "epoch": 82.0, "eval_loss": 0.023052040487527847, "eval_runtime": 0.2361, "eval_samples_per_second": 33.879, "eval_steps_per_second": 4.235, "step": 82 }, { "epoch": 83.0, "eval_loss": 0.02290300466120243, "eval_runtime": 0.2361, "eval_samples_per_second": 33.884, "eval_steps_per_second": 4.236, "step": 83 }, { "epoch": 84.0, "eval_loss": 0.02261677198112011, "eval_runtime": 0.2286, "eval_samples_per_second": 35.002, "eval_steps_per_second": 4.375, "step": 84 }, { "epoch": 85.0, "eval_loss": 0.022289568558335304, "eval_runtime": 0.2353, "eval_samples_per_second": 34.003, "eval_steps_per_second": 4.25, "step": 85 }, { "epoch": 86.0, "eval_loss": 0.02184910513460636, "eval_runtime": 0.2287, "eval_samples_per_second": 34.986, "eval_steps_per_second": 4.373, "step": 86 }, { "epoch": 87.0, "eval_loss": 0.021228935569524765, "eval_runtime": 0.2349, "eval_samples_per_second": 34.051, "eval_steps_per_second": 4.256, "step": 87 }, { "epoch": 88.0, "eval_loss": 0.020517783239483833, "eval_runtime": 0.2353, "eval_samples_per_second": 34.002, "eval_steps_per_second": 4.25, "step": 88 }, { "epoch": 89.0, "eval_loss": 0.019832810387015343, "eval_runtime": 0.2232, "eval_samples_per_second": 35.839, "eval_steps_per_second": 4.48, "step": 89 }, { "epoch": 90.0, "eval_loss": 0.0191506277769804, "eval_runtime": 0.2312, "eval_samples_per_second": 34.605, "eval_steps_per_second": 4.326, "step": 90 }, { "epoch": 91.0, "eval_loss": 0.018617864698171616, "eval_runtime": 0.2243, "eval_samples_per_second": 35.667, "eval_steps_per_second": 4.458, "step": 91 }, { "epoch": 92.0, "eval_loss": 0.01811818592250347, "eval_runtime": 0.2348, "eval_samples_per_second": 34.07, "eval_steps_per_second": 4.259, "step": 92 }, { "epoch": 93.0, "eval_loss": 0.01765601523220539, "eval_runtime": 0.2363, "eval_samples_per_second": 33.853, "eval_steps_per_second": 4.232, "step": 93 }, { "epoch": 94.0, "eval_loss": 0.017278417944908142, "eval_runtime": 0.2264, "eval_samples_per_second": 35.331, "eval_steps_per_second": 4.416, "step": 94 }, { "epoch": 95.0, "eval_loss": 0.016984442248940468, "eval_runtime": 0.2349, "eval_samples_per_second": 34.053, "eval_steps_per_second": 4.257, "step": 95 }, { "epoch": 96.0, "eval_loss": 0.01675889454782009, "eval_runtime": 0.2343, "eval_samples_per_second": 34.141, "eval_steps_per_second": 4.268, "step": 96 }, { "epoch": 97.0, "eval_loss": 0.016593070700764656, "eval_runtime": 0.2359, "eval_samples_per_second": 33.915, "eval_steps_per_second": 4.239, "step": 97 }, { "epoch": 98.0, "eval_loss": 0.016466278582811356, "eval_runtime": 0.2296, "eval_samples_per_second": 34.851, "eval_steps_per_second": 4.356, "step": 98 }, { "epoch": 99.0, "eval_loss": 0.016385838389396667, "eval_runtime": 0.2364, "eval_samples_per_second": 33.84, "eval_steps_per_second": 4.23, "step": 99 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 1.4009, "step": 100 }, { "epoch": 100.0, "eval_loss": 0.016336046159267426, "eval_runtime": 0.2203, "eval_samples_per_second": 36.315, "eval_steps_per_second": 4.539, "step": 100 }, { "epoch": 100.0, "step": 100, "total_flos": 487166312448000.0, "train_loss": 1.40092041015625, "train_runtime": 1064.353, "train_samples_per_second": 0.752, "train_steps_per_second": 0.094 } ], "logging_steps": 100, "max_steps": 100, "num_train_epochs": 100, "save_steps": 500, "total_flos": 487166312448000.0, "trial_name": null, "trial_params": null }