|
{ |
|
"best_metric": 2.9971697330474854, |
|
"best_model_checkpoint": "mobilebert_add_pre-training-complete/checkpoint-293068", |
|
"epoch": 167.87912702853944, |
|
"global_step": 300000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.971873957985996e-05, |
|
"loss": 4.8119, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.31985974754558205, |
|
"eval_loss": 4.369968414306641, |
|
"eval_runtime": 3.3492, |
|
"eval_samples_per_second": 143.018, |
|
"eval_steps_per_second": 1.194, |
|
"step": 1787 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.942080693564522e-05, |
|
"loss": 4.2649, |
|
"step": 3574 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3444790701591847, |
|
"eval_loss": 4.09299373626709, |
|
"eval_runtime": 2.6635, |
|
"eval_samples_per_second": 179.842, |
|
"eval_steps_per_second": 1.502, |
|
"step": 3574 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.912287429143048e-05, |
|
"loss": 4.0457, |
|
"step": 5361 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3544818746691187, |
|
"eval_loss": 3.937490224838257, |
|
"eval_runtime": 2.6837, |
|
"eval_samples_per_second": 178.487, |
|
"eval_steps_per_second": 1.491, |
|
"step": 5361 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.882494164721574e-05, |
|
"loss": 3.9099, |
|
"step": 7148 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3643615542831731, |
|
"eval_loss": 3.853400468826294, |
|
"eval_runtime": 2.6447, |
|
"eval_samples_per_second": 181.115, |
|
"eval_steps_per_second": 1.512, |
|
"step": 7148 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.8527009003001e-05, |
|
"loss": 3.8193, |
|
"step": 8935 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.3669499105545617, |
|
"eval_loss": 3.7993216514587402, |
|
"eval_runtime": 2.8341, |
|
"eval_samples_per_second": 169.013, |
|
"eval_steps_per_second": 1.411, |
|
"step": 8935 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.822907635878626e-05, |
|
"loss": 3.7517, |
|
"step": 10722 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.3729702583561185, |
|
"eval_loss": 3.7414000034332275, |
|
"eval_runtime": 2.6482, |
|
"eval_samples_per_second": 180.878, |
|
"eval_steps_per_second": 1.51, |
|
"step": 10722 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.7931143714571526e-05, |
|
"loss": 3.6983, |
|
"step": 12509 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.38179712437501767, |
|
"eval_loss": 3.673661708831787, |
|
"eval_runtime": 2.6433, |
|
"eval_samples_per_second": 181.216, |
|
"eval_steps_per_second": 1.513, |
|
"step": 12509 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.7633211070356786e-05, |
|
"loss": 3.6565, |
|
"step": 14296 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.3794307568661142, |
|
"eval_loss": 3.6657235622406006, |
|
"eval_runtime": 2.6602, |
|
"eval_samples_per_second": 180.063, |
|
"eval_steps_per_second": 1.504, |
|
"step": 14296 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.7335278426142046e-05, |
|
"loss": 3.619, |
|
"step": 16083 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.38690059135113924, |
|
"eval_loss": 3.6128811836242676, |
|
"eval_runtime": 2.9694, |
|
"eval_samples_per_second": 161.314, |
|
"eval_steps_per_second": 1.347, |
|
"step": 16083 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.703734578192731e-05, |
|
"loss": 3.5899, |
|
"step": 17870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.39099787685774945, |
|
"eval_loss": 3.580448627471924, |
|
"eval_runtime": 2.6649, |
|
"eval_samples_per_second": 179.741, |
|
"eval_steps_per_second": 1.501, |
|
"step": 17870 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.673941313771257e-05, |
|
"loss": 3.5597, |
|
"step": 19657 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.3964062763624093, |
|
"eval_loss": 3.5432231426239014, |
|
"eval_runtime": 2.6576, |
|
"eval_samples_per_second": 180.239, |
|
"eval_steps_per_second": 1.505, |
|
"step": 19657 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.644148049349783e-05, |
|
"loss": 3.5329, |
|
"step": 21444 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.39582186502256966, |
|
"eval_loss": 3.539653778076172, |
|
"eval_runtime": 2.6515, |
|
"eval_samples_per_second": 180.65, |
|
"eval_steps_per_second": 1.509, |
|
"step": 21444 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 4.61435478492831e-05, |
|
"loss": 3.5088, |
|
"step": 23231 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.4010702757800555, |
|
"eval_loss": 3.48960542678833, |
|
"eval_runtime": 2.671, |
|
"eval_samples_per_second": 179.334, |
|
"eval_steps_per_second": 1.498, |
|
"step": 23231 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.584561520506836e-05, |
|
"loss": 3.4904, |
|
"step": 25018 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.39997756086615055, |
|
"eval_loss": 3.473088502883911, |
|
"eval_runtime": 2.9516, |
|
"eval_samples_per_second": 162.282, |
|
"eval_steps_per_second": 1.355, |
|
"step": 25018 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.5547682560853625e-05, |
|
"loss": 3.4703, |
|
"step": 26805 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.39938999029530015, |
|
"eval_loss": 3.4971117973327637, |
|
"eval_runtime": 2.873, |
|
"eval_samples_per_second": 166.723, |
|
"eval_steps_per_second": 1.392, |
|
"step": 26805 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.5249749916638885e-05, |
|
"loss": 3.4533, |
|
"step": 28592 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4049408937204818, |
|
"eval_loss": 3.460916519165039, |
|
"eval_runtime": 3.1252, |
|
"eval_samples_per_second": 153.27, |
|
"eval_steps_per_second": 1.28, |
|
"step": 28592 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 4.4951817272424145e-05, |
|
"loss": 3.4369, |
|
"step": 30379 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.4066726276964346, |
|
"eval_loss": 3.441118001937866, |
|
"eval_runtime": 2.8852, |
|
"eval_samples_per_second": 166.017, |
|
"eval_steps_per_second": 1.386, |
|
"step": 30379 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 4.4653884628209404e-05, |
|
"loss": 3.423, |
|
"step": 32166 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.40659769666211204, |
|
"eval_loss": 3.4219350814819336, |
|
"eval_runtime": 2.9284, |
|
"eval_samples_per_second": 163.569, |
|
"eval_steps_per_second": 1.366, |
|
"step": 32166 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 4.4355951983994664e-05, |
|
"loss": 3.4084, |
|
"step": 33953 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.40138222849083216, |
|
"eval_loss": 3.4476888179779053, |
|
"eval_runtime": 2.8209, |
|
"eval_samples_per_second": 169.805, |
|
"eval_steps_per_second": 1.418, |
|
"step": 33953 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.4058019339779924e-05, |
|
"loss": 3.3949, |
|
"step": 35740 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.40872225184519967, |
|
"eval_loss": 3.4012813568115234, |
|
"eval_runtime": 2.7502, |
|
"eval_samples_per_second": 174.171, |
|
"eval_steps_per_second": 1.454, |
|
"step": 35740 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 4.376008669556519e-05, |
|
"loss": 3.3811, |
|
"step": 37527 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.4130312412046158, |
|
"eval_loss": 3.3642032146453857, |
|
"eval_runtime": 2.8758, |
|
"eval_samples_per_second": 166.565, |
|
"eval_steps_per_second": 1.391, |
|
"step": 37527 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 4.346215405135045e-05, |
|
"loss": 3.3688, |
|
"step": 39314 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.403096026951026, |
|
"eval_loss": 3.417304277420044, |
|
"eval_runtime": 2.7385, |
|
"eval_samples_per_second": 174.915, |
|
"eval_steps_per_second": 1.461, |
|
"step": 39314 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 4.316422140713571e-05, |
|
"loss": 3.3598, |
|
"step": 41101 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.4101078204663862, |
|
"eval_loss": 3.401829481124878, |
|
"eval_runtime": 2.6232, |
|
"eval_samples_per_second": 182.602, |
|
"eval_steps_per_second": 1.525, |
|
"step": 41101 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.286628876292098e-05, |
|
"loss": 3.3484, |
|
"step": 42888 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.4142865190693482, |
|
"eval_loss": 3.3499021530151367, |
|
"eval_runtime": 2.8522, |
|
"eval_samples_per_second": 167.94, |
|
"eval_steps_per_second": 1.402, |
|
"step": 42888 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.2568356118706237e-05, |
|
"loss": 3.3363, |
|
"step": 44675 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.4118788492364802, |
|
"eval_loss": 3.3674731254577637, |
|
"eval_runtime": 2.7234, |
|
"eval_samples_per_second": 175.884, |
|
"eval_steps_per_second": 1.469, |
|
"step": 44675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 4.2270423474491496e-05, |
|
"loss": 3.3274, |
|
"step": 46462 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.41535205305488926, |
|
"eval_loss": 3.356222629547119, |
|
"eval_runtime": 2.6874, |
|
"eval_samples_per_second": 178.241, |
|
"eval_steps_per_second": 1.488, |
|
"step": 46462 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 4.197249083027676e-05, |
|
"loss": 3.3161, |
|
"step": 48249 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.41593196314670444, |
|
"eval_loss": 3.3487422466278076, |
|
"eval_runtime": 2.6979, |
|
"eval_samples_per_second": 177.545, |
|
"eval_steps_per_second": 1.483, |
|
"step": 48249 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 4.167455818606202e-05, |
|
"loss": 3.3073, |
|
"step": 50036 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.4159050645369928, |
|
"eval_loss": 3.329265832901001, |
|
"eval_runtime": 2.6601, |
|
"eval_samples_per_second": 180.065, |
|
"eval_steps_per_second": 1.504, |
|
"step": 50036 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 4.137662554184728e-05, |
|
"loss": 3.2991, |
|
"step": 51823 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.41597842575425586, |
|
"eval_loss": 3.3317177295684814, |
|
"eval_runtime": 2.632, |
|
"eval_samples_per_second": 181.991, |
|
"eval_steps_per_second": 1.52, |
|
"step": 51823 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.107869289763255e-05, |
|
"loss": 3.2899, |
|
"step": 53610 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.4182503044549548, |
|
"eval_loss": 3.3058271408081055, |
|
"eval_runtime": 2.6549, |
|
"eval_samples_per_second": 180.418, |
|
"eval_steps_per_second": 1.507, |
|
"step": 53610 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 4.078076025341781e-05, |
|
"loss": 3.2814, |
|
"step": 55397 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.42345443682608325, |
|
"eval_loss": 3.2795028686523438, |
|
"eval_runtime": 2.6768, |
|
"eval_samples_per_second": 178.945, |
|
"eval_steps_per_second": 1.494, |
|
"step": 55397 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 4.0482827609203076e-05, |
|
"loss": 3.2734, |
|
"step": 57184 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.4142510746169955, |
|
"eval_loss": 3.318493366241455, |
|
"eval_runtime": 2.6511, |
|
"eval_samples_per_second": 180.679, |
|
"eval_steps_per_second": 1.509, |
|
"step": 57184 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 4.018489496498833e-05, |
|
"loss": 3.266, |
|
"step": 58971 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.4268454481298518, |
|
"eval_loss": 3.268219232559204, |
|
"eval_runtime": 2.8163, |
|
"eval_samples_per_second": 170.079, |
|
"eval_steps_per_second": 1.42, |
|
"step": 58971 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 3.988696232077359e-05, |
|
"loss": 3.2578, |
|
"step": 60758 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.4180501984459724, |
|
"eval_loss": 3.3144567012786865, |
|
"eval_runtime": 2.6553, |
|
"eval_samples_per_second": 180.397, |
|
"eval_steps_per_second": 1.506, |
|
"step": 60758 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 3.9589029676558855e-05, |
|
"loss": 3.2506, |
|
"step": 62545 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.423013128672233, |
|
"eval_loss": 3.272632598876953, |
|
"eval_runtime": 2.6457, |
|
"eval_samples_per_second": 181.048, |
|
"eval_steps_per_second": 1.512, |
|
"step": 62545 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 3.9291097032344115e-05, |
|
"loss": 3.2423, |
|
"step": 64332 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.42176717460496294, |
|
"eval_loss": 3.273455858230591, |
|
"eval_runtime": 2.6217, |
|
"eval_samples_per_second": 182.708, |
|
"eval_steps_per_second": 1.526, |
|
"step": 64332 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 3.8993164388129375e-05, |
|
"loss": 3.2359, |
|
"step": 66119 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.417536709284797, |
|
"eval_loss": 3.284508228302002, |
|
"eval_runtime": 2.7995, |
|
"eval_samples_per_second": 171.1, |
|
"eval_steps_per_second": 1.429, |
|
"step": 66119 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 3.869523174391464e-05, |
|
"loss": 3.2293, |
|
"step": 67906 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.4193099997219983, |
|
"eval_loss": 3.306713581085205, |
|
"eval_runtime": 2.606, |
|
"eval_samples_per_second": 183.804, |
|
"eval_steps_per_second": 1.535, |
|
"step": 67906 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 3.83972990996999e-05, |
|
"loss": 3.2207, |
|
"step": 69693 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.42573705623362873, |
|
"eval_loss": 3.2586092948913574, |
|
"eval_runtime": 2.8824, |
|
"eval_samples_per_second": 166.182, |
|
"eval_steps_per_second": 1.388, |
|
"step": 69693 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.809936645548516e-05, |
|
"loss": 3.2138, |
|
"step": 71480 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.4249552472588946, |
|
"eval_loss": 3.2543227672576904, |
|
"eval_runtime": 2.8002, |
|
"eval_samples_per_second": 171.061, |
|
"eval_steps_per_second": 1.428, |
|
"step": 71480 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 3.780143381127043e-05, |
|
"loss": 3.2077, |
|
"step": 73267 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.42259816679868734, |
|
"eval_loss": 3.239523410797119, |
|
"eval_runtime": 2.8097, |
|
"eval_samples_per_second": 170.48, |
|
"eval_steps_per_second": 1.424, |
|
"step": 73267 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 3.750350116705569e-05, |
|
"loss": 3.202, |
|
"step": 75054 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.4270068903702874, |
|
"eval_loss": 3.222418785095215, |
|
"eval_runtime": 2.7468, |
|
"eval_samples_per_second": 174.388, |
|
"eval_steps_per_second": 1.456, |
|
"step": 75054 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 3.720556852284095e-05, |
|
"loss": 3.1964, |
|
"step": 76841 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.4233582733012544, |
|
"eval_loss": 3.256213426589966, |
|
"eval_runtime": 2.7921, |
|
"eval_samples_per_second": 171.556, |
|
"eval_steps_per_second": 1.433, |
|
"step": 76841 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 3.6907635878626214e-05, |
|
"loss": 3.1925, |
|
"step": 78628 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.4251221214235869, |
|
"eval_loss": 3.2543723583221436, |
|
"eval_runtime": 2.7441, |
|
"eval_samples_per_second": 174.557, |
|
"eval_steps_per_second": 1.458, |
|
"step": 78628 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 3.660970323441147e-05, |
|
"loss": 3.1865, |
|
"step": 80415 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.43534812547819435, |
|
"eval_loss": 3.204282522201538, |
|
"eval_runtime": 2.8747, |
|
"eval_samples_per_second": 166.629, |
|
"eval_steps_per_second": 1.391, |
|
"step": 80415 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 3.631177059019673e-05, |
|
"loss": 3.1812, |
|
"step": 82202 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.42858336117607754, |
|
"eval_loss": 3.2280006408691406, |
|
"eval_runtime": 2.742, |
|
"eval_samples_per_second": 174.687, |
|
"eval_steps_per_second": 1.459, |
|
"step": 82202 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 3.601383794598199e-05, |
|
"loss": 3.1744, |
|
"step": 83989 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.427574415606256, |
|
"eval_loss": 3.21743106842041, |
|
"eval_runtime": 2.7215, |
|
"eval_samples_per_second": 176.009, |
|
"eval_steps_per_second": 1.47, |
|
"step": 83989 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 3.571590530176725e-05, |
|
"loss": 3.1699, |
|
"step": 85776 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.4317117218804673, |
|
"eval_loss": 3.1971514225006104, |
|
"eval_runtime": 2.9267, |
|
"eval_samples_per_second": 163.664, |
|
"eval_steps_per_second": 1.367, |
|
"step": 85776 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 3.541797265755252e-05, |
|
"loss": 3.1652, |
|
"step": 87563 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.43024569330697543, |
|
"eval_loss": 3.201594114303589, |
|
"eval_runtime": 2.8821, |
|
"eval_samples_per_second": 166.197, |
|
"eval_steps_per_second": 1.388, |
|
"step": 87563 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 3.512004001333778e-05, |
|
"loss": 3.1609, |
|
"step": 89350 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.4337845459672871, |
|
"eval_loss": 3.201768636703491, |
|
"eval_runtime": 2.7518, |
|
"eval_samples_per_second": 174.066, |
|
"eval_steps_per_second": 1.454, |
|
"step": 89350 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 3.482210736912304e-05, |
|
"loss": 3.1548, |
|
"step": 91137 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.43272542487719495, |
|
"eval_loss": 3.1950149536132812, |
|
"eval_runtime": 2.6564, |
|
"eval_samples_per_second": 180.321, |
|
"eval_steps_per_second": 1.506, |
|
"step": 91137 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 3.4524174724908306e-05, |
|
"loss": 3.1508, |
|
"step": 92924 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.42785334961519894, |
|
"eval_loss": 3.212803363800049, |
|
"eval_runtime": 2.8096, |
|
"eval_samples_per_second": 170.486, |
|
"eval_steps_per_second": 1.424, |
|
"step": 92924 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 3.4226242080693565e-05, |
|
"loss": 3.1478, |
|
"step": 94711 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.4303176427710337, |
|
"eval_loss": 3.202669382095337, |
|
"eval_runtime": 2.727, |
|
"eval_samples_per_second": 175.65, |
|
"eval_steps_per_second": 1.467, |
|
"step": 94711 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 3.3928309436478825e-05, |
|
"loss": 3.1423, |
|
"step": 96498 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.43117465657415194, |
|
"eval_loss": 3.195861577987671, |
|
"eval_runtime": 2.7557, |
|
"eval_samples_per_second": 173.823, |
|
"eval_steps_per_second": 1.452, |
|
"step": 96498 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 3.363037679226409e-05, |
|
"loss": 3.1383, |
|
"step": 98285 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.4340061025109873, |
|
"eval_loss": 3.1911423206329346, |
|
"eval_runtime": 2.754, |
|
"eval_samples_per_second": 173.929, |
|
"eval_steps_per_second": 1.452, |
|
"step": 98285 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 3.333244414804935e-05, |
|
"loss": 3.1336, |
|
"step": 100072 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.43203597021216805, |
|
"eval_loss": 3.1913583278656006, |
|
"eval_runtime": 2.6171, |
|
"eval_samples_per_second": 183.026, |
|
"eval_steps_per_second": 1.528, |
|
"step": 100072 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 3.303451150383461e-05, |
|
"loss": 3.129, |
|
"step": 101859 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.43117934403349617, |
|
"eval_loss": 3.185468912124634, |
|
"eval_runtime": 2.8993, |
|
"eval_samples_per_second": 165.21, |
|
"eval_steps_per_second": 1.38, |
|
"step": 101859 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 3.273657885961988e-05, |
|
"loss": 3.1233, |
|
"step": 103646 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.4336863338376198, |
|
"eval_loss": 3.1569876670837402, |
|
"eval_runtime": 2.7946, |
|
"eval_samples_per_second": 171.403, |
|
"eval_steps_per_second": 1.431, |
|
"step": 103646 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 3.243864621540514e-05, |
|
"loss": 3.1198, |
|
"step": 105433 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.4307147438578878, |
|
"eval_loss": 3.2042369842529297, |
|
"eval_runtime": 2.9712, |
|
"eval_samples_per_second": 161.213, |
|
"eval_steps_per_second": 1.346, |
|
"step": 105433 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 3.21407135711904e-05, |
|
"loss": 3.1153, |
|
"step": 107220 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.4389830036996857, |
|
"eval_loss": 3.1370162963867188, |
|
"eval_runtime": 2.6328, |
|
"eval_samples_per_second": 181.937, |
|
"eval_steps_per_second": 1.519, |
|
"step": 107220 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 3.1842780926975664e-05, |
|
"loss": 3.1122, |
|
"step": 109007 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.4412110687878109, |
|
"eval_loss": 3.1612205505371094, |
|
"eval_runtime": 2.8382, |
|
"eval_samples_per_second": 168.771, |
|
"eval_steps_per_second": 1.409, |
|
"step": 109007 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 3.154484828276092e-05, |
|
"loss": 3.1093, |
|
"step": 110794 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.4347947500698129, |
|
"eval_loss": 3.164184331893921, |
|
"eval_runtime": 2.8661, |
|
"eval_samples_per_second": 167.128, |
|
"eval_steps_per_second": 1.396, |
|
"step": 110794 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 3.1246915638546184e-05, |
|
"loss": 3.1048, |
|
"step": 112581 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.4325971058644326, |
|
"eval_loss": 3.1806650161743164, |
|
"eval_runtime": 2.8065, |
|
"eval_samples_per_second": 170.676, |
|
"eval_steps_per_second": 1.425, |
|
"step": 112581 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 3.0948982994331444e-05, |
|
"loss": 3.1013, |
|
"step": 114368 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.4358816103240286, |
|
"eval_loss": 3.144869089126587, |
|
"eval_runtime": 2.6572, |
|
"eval_samples_per_second": 180.267, |
|
"eval_steps_per_second": 1.505, |
|
"step": 114368 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 3.06510503501167e-05, |
|
"loss": 3.0977, |
|
"step": 116155 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.43795185846729606, |
|
"eval_loss": 3.1408073902130127, |
|
"eval_runtime": 2.8767, |
|
"eval_samples_per_second": 166.508, |
|
"eval_steps_per_second": 1.39, |
|
"step": 116155 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 3.0353117705901967e-05, |
|
"loss": 3.0926, |
|
"step": 117942 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.43651985318651987, |
|
"eval_loss": 3.172287702560425, |
|
"eval_runtime": 2.6811, |
|
"eval_samples_per_second": 178.655, |
|
"eval_steps_per_second": 1.492, |
|
"step": 117942 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 3.005518506168723e-05, |
|
"loss": 3.0901, |
|
"step": 119729 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.43799220873854433, |
|
"eval_loss": 3.1473004817962646, |
|
"eval_runtime": 2.8395, |
|
"eval_samples_per_second": 168.689, |
|
"eval_steps_per_second": 1.409, |
|
"step": 119729 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 2.9757252417472493e-05, |
|
"loss": 3.0882, |
|
"step": 121516 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.43776389123458875, |
|
"eval_loss": 3.140061378479004, |
|
"eval_runtime": 2.8036, |
|
"eval_samples_per_second": 170.853, |
|
"eval_steps_per_second": 1.427, |
|
"step": 121516 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 2.9459319773257753e-05, |
|
"loss": 3.0839, |
|
"step": 123303 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.43737838066499335, |
|
"eval_loss": 3.12809157371521, |
|
"eval_runtime": 2.7834, |
|
"eval_samples_per_second": 172.089, |
|
"eval_steps_per_second": 1.437, |
|
"step": 123303 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 2.9161387129043016e-05, |
|
"loss": 3.0794, |
|
"step": 125090 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.436696631101403, |
|
"eval_loss": 3.1356077194213867, |
|
"eval_runtime": 2.7463, |
|
"eval_samples_per_second": 174.417, |
|
"eval_steps_per_second": 1.457, |
|
"step": 125090 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 2.886345448482828e-05, |
|
"loss": 3.0766, |
|
"step": 126877 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.4397433382835047, |
|
"eval_loss": 3.1019225120544434, |
|
"eval_runtime": 2.7446, |
|
"eval_samples_per_second": 174.525, |
|
"eval_steps_per_second": 1.457, |
|
"step": 126877 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 2.856552184061354e-05, |
|
"loss": 3.074, |
|
"step": 128664 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.43546703529212627, |
|
"eval_loss": 3.162637948989868, |
|
"eval_runtime": 2.7735, |
|
"eval_samples_per_second": 172.704, |
|
"eval_steps_per_second": 1.442, |
|
"step": 128664 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 2.8267589196398802e-05, |
|
"loss": 3.0702, |
|
"step": 130451 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.4387263241465351, |
|
"eval_loss": 3.1287012100219727, |
|
"eval_runtime": 2.7531, |
|
"eval_samples_per_second": 173.986, |
|
"eval_steps_per_second": 1.453, |
|
"step": 130451 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 2.7969656552184065e-05, |
|
"loss": 3.0676, |
|
"step": 132238 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.43789707310306564, |
|
"eval_loss": 3.136582136154175, |
|
"eval_runtime": 2.7895, |
|
"eval_samples_per_second": 171.718, |
|
"eval_steps_per_second": 1.434, |
|
"step": 132238 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.767172390796933e-05, |
|
"loss": 3.0648, |
|
"step": 134025 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.43458386755114553, |
|
"eval_loss": 3.178231716156006, |
|
"eval_runtime": 2.7934, |
|
"eval_samples_per_second": 171.478, |
|
"eval_steps_per_second": 1.432, |
|
"step": 134025 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 2.7373791263754585e-05, |
|
"loss": 3.0624, |
|
"step": 135812 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.44267685994547346, |
|
"eval_loss": 3.1229066848754883, |
|
"eval_runtime": 2.7648, |
|
"eval_samples_per_second": 173.25, |
|
"eval_steps_per_second": 1.447, |
|
"step": 135812 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"learning_rate": 2.7075858619539845e-05, |
|
"loss": 3.0575, |
|
"step": 137599 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.44304284497210544, |
|
"eval_loss": 3.11391282081604, |
|
"eval_runtime": 2.8016, |
|
"eval_samples_per_second": 170.976, |
|
"eval_steps_per_second": 1.428, |
|
"step": 137599 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 2.6777925975325108e-05, |
|
"loss": 3.0549, |
|
"step": 139386 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.4431432021793468, |
|
"eval_loss": 3.0947771072387695, |
|
"eval_runtime": 2.8076, |
|
"eval_samples_per_second": 170.609, |
|
"eval_steps_per_second": 1.425, |
|
"step": 139386 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"learning_rate": 2.647999333111037e-05, |
|
"loss": 3.052, |
|
"step": 141173 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.4451516602360098, |
|
"eval_loss": 3.1029744148254395, |
|
"eval_runtime": 2.6939, |
|
"eval_samples_per_second": 177.81, |
|
"eval_steps_per_second": 1.485, |
|
"step": 141173 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 2.618206068689563e-05, |
|
"loss": 3.0527, |
|
"step": 142960 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.4447775411856999, |
|
"eval_loss": 3.0929136276245117, |
|
"eval_runtime": 2.7294, |
|
"eval_samples_per_second": 175.498, |
|
"eval_steps_per_second": 1.466, |
|
"step": 142960 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"learning_rate": 2.5884128042680894e-05, |
|
"loss": 3.0466, |
|
"step": 144747 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.44276007765216396, |
|
"eval_loss": 3.08884334564209, |
|
"eval_runtime": 2.9041, |
|
"eval_samples_per_second": 164.938, |
|
"eval_steps_per_second": 1.377, |
|
"step": 144747 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 2.5586195398466157e-05, |
|
"loss": 3.0439, |
|
"step": 146534 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.44138026993182133, |
|
"eval_loss": 3.103548765182495, |
|
"eval_runtime": 2.7889, |
|
"eval_samples_per_second": 171.752, |
|
"eval_steps_per_second": 1.434, |
|
"step": 146534 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 2.5288262754251417e-05, |
|
"loss": 3.0409, |
|
"step": 148321 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.4411418975650714, |
|
"eval_loss": 3.1111607551574707, |
|
"eval_runtime": 2.7759, |
|
"eval_samples_per_second": 172.554, |
|
"eval_steps_per_second": 1.441, |
|
"step": 148321 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 2.499033011003668e-05, |
|
"loss": 3.041, |
|
"step": 150108 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.43987652588747017, |
|
"eval_loss": 3.129586696624756, |
|
"eval_runtime": 2.8127, |
|
"eval_samples_per_second": 170.3, |
|
"eval_steps_per_second": 1.422, |
|
"step": 150108 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 2.4692397465821944e-05, |
|
"loss": 3.0379, |
|
"step": 151895 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.44284645440186216, |
|
"eval_loss": 3.122410297393799, |
|
"eval_runtime": 2.8313, |
|
"eval_samples_per_second": 169.183, |
|
"eval_steps_per_second": 1.413, |
|
"step": 151895 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 2.4394464821607203e-05, |
|
"loss": 3.0332, |
|
"step": 153682 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.43977575332866153, |
|
"eval_loss": 3.1100828647613525, |
|
"eval_runtime": 2.7772, |
|
"eval_samples_per_second": 172.476, |
|
"eval_steps_per_second": 1.44, |
|
"step": 153682 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"learning_rate": 2.4096532177392463e-05, |
|
"loss": 3.0315, |
|
"step": 155469 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.4423447851731753, |
|
"eval_loss": 3.1045453548431396, |
|
"eval_runtime": 2.6529, |
|
"eval_samples_per_second": 180.554, |
|
"eval_steps_per_second": 1.508, |
|
"step": 155469 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 2.3798599533177726e-05, |
|
"loss": 3.0302, |
|
"step": 157256 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.4446285634040163, |
|
"eval_loss": 3.0912954807281494, |
|
"eval_runtime": 2.7592, |
|
"eval_samples_per_second": 173.599, |
|
"eval_steps_per_second": 1.45, |
|
"step": 157256 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"learning_rate": 2.350066688896299e-05, |
|
"loss": 3.0265, |
|
"step": 159043 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.44469112504856523, |
|
"eval_loss": 3.074544906616211, |
|
"eval_runtime": 2.879, |
|
"eval_samples_per_second": 166.379, |
|
"eval_steps_per_second": 1.389, |
|
"step": 159043 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 2.320273424474825e-05, |
|
"loss": 3.0243, |
|
"step": 160830 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.4443120079831467, |
|
"eval_loss": 3.0942282676696777, |
|
"eval_runtime": 2.7539, |
|
"eval_samples_per_second": 173.935, |
|
"eval_steps_per_second": 1.452, |
|
"step": 160830 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 2.2904801600533513e-05, |
|
"loss": 3.0222, |
|
"step": 162617 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.4432415712960099, |
|
"eval_loss": 3.0821101665496826, |
|
"eval_runtime": 2.9525, |
|
"eval_samples_per_second": 162.233, |
|
"eval_steps_per_second": 1.355, |
|
"step": 162617 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 2.2606868956318776e-05, |
|
"loss": 3.021, |
|
"step": 164404 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.4472506123828026, |
|
"eval_loss": 3.0616304874420166, |
|
"eval_runtime": 2.7386, |
|
"eval_samples_per_second": 174.904, |
|
"eval_steps_per_second": 1.461, |
|
"step": 164404 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 2.2308936312104036e-05, |
|
"loss": 3.0183, |
|
"step": 166191 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.4450408140445041, |
|
"eval_loss": 3.102149724960327, |
|
"eval_runtime": 2.7638, |
|
"eval_samples_per_second": 173.309, |
|
"eval_steps_per_second": 1.447, |
|
"step": 166191 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 2.2011003667889295e-05, |
|
"loss": 3.0155, |
|
"step": 167978 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.44215097425485717, |
|
"eval_loss": 3.116255283355713, |
|
"eval_runtime": 2.7634, |
|
"eval_samples_per_second": 173.336, |
|
"eval_steps_per_second": 1.447, |
|
"step": 167978 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 2.171307102367456e-05, |
|
"loss": 3.0132, |
|
"step": 169765 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.4493496925561758, |
|
"eval_loss": 3.0645430088043213, |
|
"eval_runtime": 2.9971, |
|
"eval_samples_per_second": 159.823, |
|
"eval_steps_per_second": 1.335, |
|
"step": 169765 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 2.1415138379459822e-05, |
|
"loss": 3.0118, |
|
"step": 171552 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.44202857943401513, |
|
"eval_loss": 3.092226982116699, |
|
"eval_runtime": 2.7558, |
|
"eval_samples_per_second": 173.815, |
|
"eval_steps_per_second": 1.451, |
|
"step": 171552 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 2.111720573524508e-05, |
|
"loss": 3.0105, |
|
"step": 173339 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.4422707131510743, |
|
"eval_loss": 3.118738889694214, |
|
"eval_runtime": 2.8983, |
|
"eval_samples_per_second": 165.268, |
|
"eval_steps_per_second": 1.38, |
|
"step": 173339 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 2.0819273091030345e-05, |
|
"loss": 3.0063, |
|
"step": 175126 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.44618075143856484, |
|
"eval_loss": 3.1060523986816406, |
|
"eval_runtime": 2.8061, |
|
"eval_samples_per_second": 170.697, |
|
"eval_steps_per_second": 1.425, |
|
"step": 175126 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 2.0521340446815608e-05, |
|
"loss": 3.0035, |
|
"step": 176913 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.4423852183650616, |
|
"eval_loss": 3.1097826957702637, |
|
"eval_runtime": 3.1412, |
|
"eval_samples_per_second": 152.491, |
|
"eval_steps_per_second": 1.273, |
|
"step": 176913 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.0223407802600868e-05, |
|
"loss": 3.0025, |
|
"step": 178700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.4454137587238285, |
|
"eval_loss": 3.0856051445007324, |
|
"eval_runtime": 2.8018, |
|
"eval_samples_per_second": 170.961, |
|
"eval_steps_per_second": 1.428, |
|
"step": 178700 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"learning_rate": 1.992547515838613e-05, |
|
"loss": 3.0001, |
|
"step": 180487 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.4503842392063714, |
|
"eval_loss": 3.0584394931793213, |
|
"eval_runtime": 2.8738, |
|
"eval_samples_per_second": 166.676, |
|
"eval_steps_per_second": 1.392, |
|
"step": 180487 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"learning_rate": 1.962754251417139e-05, |
|
"loss": 2.9979, |
|
"step": 182274 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.4435431811165062, |
|
"eval_loss": 3.089740753173828, |
|
"eval_runtime": 3.0249, |
|
"eval_samples_per_second": 158.354, |
|
"eval_steps_per_second": 1.322, |
|
"step": 182274 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"learning_rate": 1.9329609869956654e-05, |
|
"loss": 2.9963, |
|
"step": 184061 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.4437190013170062, |
|
"eval_loss": 3.0712223052978516, |
|
"eval_runtime": 2.8272, |
|
"eval_samples_per_second": 169.429, |
|
"eval_steps_per_second": 1.415, |
|
"step": 184061 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 1.9031677225741914e-05, |
|
"loss": 2.9944, |
|
"step": 185848 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.445785838620922, |
|
"eval_loss": 3.085341453552246, |
|
"eval_runtime": 2.8252, |
|
"eval_samples_per_second": 169.547, |
|
"eval_steps_per_second": 1.416, |
|
"step": 185848 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 1.8733744581527177e-05, |
|
"loss": 2.9931, |
|
"step": 187635 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.44751641410643545, |
|
"eval_loss": 3.0809359550476074, |
|
"eval_runtime": 2.5855, |
|
"eval_samples_per_second": 185.266, |
|
"eval_steps_per_second": 1.547, |
|
"step": 187635 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 1.843581193731244e-05, |
|
"loss": 2.992, |
|
"step": 189422 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.44257175014068656, |
|
"eval_loss": 3.0910277366638184, |
|
"eval_runtime": 2.6969, |
|
"eval_samples_per_second": 177.61, |
|
"eval_steps_per_second": 1.483, |
|
"step": 189422 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 1.81378792930977e-05, |
|
"loss": 2.9886, |
|
"step": 191209 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.44900508104118764, |
|
"eval_loss": 3.0693163871765137, |
|
"eval_runtime": 2.7589, |
|
"eval_samples_per_second": 173.62, |
|
"eval_steps_per_second": 1.45, |
|
"step": 191209 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"learning_rate": 1.7839946648882963e-05, |
|
"loss": 2.986, |
|
"step": 192996 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.4444630171333055, |
|
"eval_loss": 3.0906262397766113, |
|
"eval_runtime": 3.2348, |
|
"eval_samples_per_second": 148.078, |
|
"eval_steps_per_second": 1.237, |
|
"step": 192996 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"learning_rate": 1.7542014004668223e-05, |
|
"loss": 2.9834, |
|
"step": 194783 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.4537605471925546, |
|
"eval_loss": 3.0319700241088867, |
|
"eval_runtime": 3.1336, |
|
"eval_samples_per_second": 152.86, |
|
"eval_steps_per_second": 1.276, |
|
"step": 194783 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 1.7244081360453486e-05, |
|
"loss": 2.9829, |
|
"step": 196570 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.44560168895144264, |
|
"eval_loss": 3.0759708881378174, |
|
"eval_runtime": 2.6847, |
|
"eval_samples_per_second": 178.418, |
|
"eval_steps_per_second": 1.49, |
|
"step": 196570 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 1.6946148716238746e-05, |
|
"loss": 2.9814, |
|
"step": 198357 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.45036670646426746, |
|
"eval_loss": 3.0422720909118652, |
|
"eval_runtime": 2.9828, |
|
"eval_samples_per_second": 160.59, |
|
"eval_steps_per_second": 1.341, |
|
"step": 198357 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 1.664821607202401e-05, |
|
"loss": 2.9795, |
|
"step": 200144 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.452860867371254, |
|
"eval_loss": 3.0410661697387695, |
|
"eval_runtime": 2.9476, |
|
"eval_samples_per_second": 162.504, |
|
"eval_steps_per_second": 1.357, |
|
"step": 200144 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"learning_rate": 1.6350283427809272e-05, |
|
"loss": 2.979, |
|
"step": 201931 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.4462770034348653, |
|
"eval_loss": 3.0784435272216797, |
|
"eval_runtime": 3.0447, |
|
"eval_samples_per_second": 157.321, |
|
"eval_steps_per_second": 1.314, |
|
"step": 201931 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"learning_rate": 1.6052350783594532e-05, |
|
"loss": 2.9781, |
|
"step": 203718 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.453679165491965, |
|
"eval_loss": 3.0525529384613037, |
|
"eval_runtime": 2.985, |
|
"eval_samples_per_second": 160.472, |
|
"eval_steps_per_second": 1.34, |
|
"step": 203718 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 1.5754418139379795e-05, |
|
"loss": 2.9751, |
|
"step": 205505 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.4512479201331115, |
|
"eval_loss": 3.0478885173797607, |
|
"eval_runtime": 3.2329, |
|
"eval_samples_per_second": 148.165, |
|
"eval_steps_per_second": 1.237, |
|
"step": 205505 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 1.5456485495165055e-05, |
|
"loss": 2.9749, |
|
"step": 207292 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.44932773109243695, |
|
"eval_loss": 3.054450511932373, |
|
"eval_runtime": 2.6607, |
|
"eval_samples_per_second": 180.027, |
|
"eval_steps_per_second": 1.503, |
|
"step": 207292 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"learning_rate": 1.5158552850950317e-05, |
|
"loss": 2.9735, |
|
"step": 209079 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.44851187971612105, |
|
"eval_loss": 3.0529181957244873, |
|
"eval_runtime": 2.6442, |
|
"eval_samples_per_second": 181.151, |
|
"eval_steps_per_second": 1.513, |
|
"step": 209079 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"learning_rate": 1.4860620206735578e-05, |
|
"loss": 2.9705, |
|
"step": 210866 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.4580500769338369, |
|
"eval_loss": 3.008023977279663, |
|
"eval_runtime": 2.8363, |
|
"eval_samples_per_second": 168.883, |
|
"eval_steps_per_second": 1.41, |
|
"step": 210866 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"learning_rate": 1.4562687562520841e-05, |
|
"loss": 2.9698, |
|
"step": 212653 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.4536538892624972, |
|
"eval_loss": 3.0271081924438477, |
|
"eval_runtime": 2.5889, |
|
"eval_samples_per_second": 185.019, |
|
"eval_steps_per_second": 1.545, |
|
"step": 212653 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 1.4264754918306103e-05, |
|
"loss": 2.9674, |
|
"step": 214440 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.4481896208719377, |
|
"eval_loss": 3.0476975440979004, |
|
"eval_runtime": 2.6869, |
|
"eval_samples_per_second": 178.272, |
|
"eval_steps_per_second": 1.489, |
|
"step": 214440 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"learning_rate": 1.3966822274091364e-05, |
|
"loss": 2.9666, |
|
"step": 216227 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.45575815202192516, |
|
"eval_loss": 3.032849073410034, |
|
"eval_runtime": 2.9433, |
|
"eval_samples_per_second": 162.744, |
|
"eval_steps_per_second": 1.359, |
|
"step": 216227 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"learning_rate": 1.3668889629876628e-05, |
|
"loss": 2.9664, |
|
"step": 218014 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.4462809917355372, |
|
"eval_loss": 3.068892240524292, |
|
"eval_runtime": 2.5675, |
|
"eval_samples_per_second": 186.559, |
|
"eval_steps_per_second": 1.558, |
|
"step": 218014 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"learning_rate": 1.3370956985661887e-05, |
|
"loss": 2.9639, |
|
"step": 219801 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.44586561487533455, |
|
"eval_loss": 3.0748960971832275, |
|
"eval_runtime": 2.796, |
|
"eval_samples_per_second": 171.315, |
|
"eval_steps_per_second": 1.431, |
|
"step": 219801 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"learning_rate": 1.3073024341447149e-05, |
|
"loss": 2.9633, |
|
"step": 221588 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.4488954922628392, |
|
"eval_loss": 3.0504775047302246, |
|
"eval_runtime": 2.7162, |
|
"eval_samples_per_second": 176.347, |
|
"eval_steps_per_second": 1.473, |
|
"step": 221588 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 1.277509169723241e-05, |
|
"loss": 2.9618, |
|
"step": 223375 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.45345192714951293, |
|
"eval_loss": 3.025569438934326, |
|
"eval_runtime": 2.7094, |
|
"eval_samples_per_second": 176.794, |
|
"eval_steps_per_second": 1.476, |
|
"step": 223375 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"learning_rate": 1.2477159053017674e-05, |
|
"loss": 2.9589, |
|
"step": 225162 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.4495620785180625, |
|
"eval_loss": 3.052183151245117, |
|
"eval_runtime": 2.6479, |
|
"eval_samples_per_second": 180.901, |
|
"eval_steps_per_second": 1.511, |
|
"step": 225162 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 1.2179226408802935e-05, |
|
"loss": 2.9584, |
|
"step": 226949 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.45301326453349783, |
|
"eval_loss": 3.0450875759124756, |
|
"eval_runtime": 2.6347, |
|
"eval_samples_per_second": 181.807, |
|
"eval_steps_per_second": 1.518, |
|
"step": 226949 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 1.1881293764588197e-05, |
|
"loss": 2.9589, |
|
"step": 228736 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.4501640457065279, |
|
"eval_loss": 3.065441846847534, |
|
"eval_runtime": 2.6108, |
|
"eval_samples_per_second": 183.467, |
|
"eval_steps_per_second": 1.532, |
|
"step": 228736 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"learning_rate": 1.1583361120373458e-05, |
|
"loss": 2.9581, |
|
"step": 230523 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.45803146932307, |
|
"eval_loss": 2.998933792114258, |
|
"eval_runtime": 2.5885, |
|
"eval_samples_per_second": 185.049, |
|
"eval_steps_per_second": 1.545, |
|
"step": 230523 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 1.128542847615872e-05, |
|
"loss": 2.9554, |
|
"step": 232310 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.45082426825165417, |
|
"eval_loss": 3.0346689224243164, |
|
"eval_runtime": 2.6119, |
|
"eval_samples_per_second": 183.394, |
|
"eval_steps_per_second": 1.531, |
|
"step": 232310 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"learning_rate": 1.0987495831943983e-05, |
|
"loss": 2.9565, |
|
"step": 234097 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_accuracy": 0.44976425684777727, |
|
"eval_loss": 3.0585811138153076, |
|
"eval_runtime": 2.5925, |
|
"eval_samples_per_second": 184.76, |
|
"eval_steps_per_second": 1.543, |
|
"step": 234097 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"learning_rate": 1.0689563187729243e-05, |
|
"loss": 2.9548, |
|
"step": 235884 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.4535522171743917, |
|
"eval_loss": 3.016965389251709, |
|
"eval_runtime": 2.5996, |
|
"eval_samples_per_second": 184.261, |
|
"eval_steps_per_second": 1.539, |
|
"step": 235884 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"learning_rate": 1.0391630543514506e-05, |
|
"loss": 2.9515, |
|
"step": 237671 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_accuracy": 0.44919200987543484, |
|
"eval_loss": 3.046969175338745, |
|
"eval_runtime": 2.7591, |
|
"eval_samples_per_second": 173.609, |
|
"eval_steps_per_second": 1.45, |
|
"step": 237671 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 1.0093697899299767e-05, |
|
"loss": 2.9499, |
|
"step": 239458 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.4514692787177204, |
|
"eval_loss": 3.033890724182129, |
|
"eval_runtime": 2.5902, |
|
"eval_samples_per_second": 184.93, |
|
"eval_steps_per_second": 1.544, |
|
"step": 239458 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 9.795765255085029e-06, |
|
"loss": 2.9514, |
|
"step": 241245 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_accuracy": 0.44730392156862747, |
|
"eval_loss": 3.047365665435791, |
|
"eval_runtime": 2.5837, |
|
"eval_samples_per_second": 185.39, |
|
"eval_steps_per_second": 1.548, |
|
"step": 241245 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 9.49783261087029e-06, |
|
"loss": 2.9486, |
|
"step": 243032 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.4492987983383054, |
|
"eval_loss": 3.0426554679870605, |
|
"eval_runtime": 2.6436, |
|
"eval_samples_per_second": 181.191, |
|
"eval_steps_per_second": 1.513, |
|
"step": 243032 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"learning_rate": 9.199899966655552e-06, |
|
"loss": 2.9483, |
|
"step": 244819 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_accuracy": 0.45336896891264594, |
|
"eval_loss": 3.033630847930908, |
|
"eval_runtime": 2.6114, |
|
"eval_samples_per_second": 183.424, |
|
"eval_steps_per_second": 1.532, |
|
"step": 244819 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"learning_rate": 8.901967322440815e-06, |
|
"loss": 2.9491, |
|
"step": 246606 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.4516002576092739, |
|
"eval_loss": 3.027387857437134, |
|
"eval_runtime": 2.5975, |
|
"eval_samples_per_second": 184.405, |
|
"eval_steps_per_second": 1.54, |
|
"step": 246606 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"learning_rate": 8.604034678226076e-06, |
|
"loss": 2.9465, |
|
"step": 248393 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_accuracy": 0.45385626765882453, |
|
"eval_loss": 3.0354230403900146, |
|
"eval_runtime": 2.5982, |
|
"eval_samples_per_second": 184.359, |
|
"eval_steps_per_second": 1.54, |
|
"step": 248393 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 8.306102034011338e-06, |
|
"loss": 2.9447, |
|
"step": 250180 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.4525785166095551, |
|
"eval_loss": 3.013915538787842, |
|
"eval_runtime": 2.7656, |
|
"eval_samples_per_second": 173.197, |
|
"eval_steps_per_second": 1.446, |
|
"step": 250180 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"learning_rate": 8.0081693897966e-06, |
|
"loss": 2.9449, |
|
"step": 251967 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_accuracy": 0.4547931354099374, |
|
"eval_loss": 3.016339063644409, |
|
"eval_runtime": 2.7347, |
|
"eval_samples_per_second": 175.155, |
|
"eval_steps_per_second": 1.463, |
|
"step": 251967 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"learning_rate": 7.710236745581861e-06, |
|
"loss": 2.9439, |
|
"step": 253754 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.45336875423920414, |
|
"eval_loss": 3.0307540893554688, |
|
"eval_runtime": 2.688, |
|
"eval_samples_per_second": 178.198, |
|
"eval_steps_per_second": 1.488, |
|
"step": 253754 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"learning_rate": 7.412304101367122e-06, |
|
"loss": 2.9435, |
|
"step": 255541 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_accuracy": 0.4578975260089811, |
|
"eval_loss": 3.0242276191711426, |
|
"eval_runtime": 2.663, |
|
"eval_samples_per_second": 179.874, |
|
"eval_steps_per_second": 1.502, |
|
"step": 255541 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 7.114371457152384e-06, |
|
"loss": 2.943, |
|
"step": 257328 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.45133795837462837, |
|
"eval_loss": 3.043705463409424, |
|
"eval_runtime": 2.7686, |
|
"eval_samples_per_second": 173.01, |
|
"eval_steps_per_second": 1.445, |
|
"step": 257328 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 6.816438812937646e-06, |
|
"loss": 2.943, |
|
"step": 259115 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_accuracy": 0.4544336352454434, |
|
"eval_loss": 3.022679328918457, |
|
"eval_runtime": 2.8352, |
|
"eval_samples_per_second": 168.947, |
|
"eval_steps_per_second": 1.411, |
|
"step": 259115 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"learning_rate": 6.518506168722909e-06, |
|
"loss": 2.9403, |
|
"step": 260902 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_accuracy": 0.4477586597237944, |
|
"eval_loss": 3.0463666915893555, |
|
"eval_runtime": 2.8089, |
|
"eval_samples_per_second": 170.531, |
|
"eval_steps_per_second": 1.424, |
|
"step": 260902 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"learning_rate": 6.22057352450817e-06, |
|
"loss": 2.9407, |
|
"step": 262689 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_accuracy": 0.4465163301412564, |
|
"eval_loss": 3.0717713832855225, |
|
"eval_runtime": 2.6924, |
|
"eval_samples_per_second": 177.907, |
|
"eval_steps_per_second": 1.486, |
|
"step": 262689 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"learning_rate": 5.922640880293432e-06, |
|
"loss": 2.9397, |
|
"step": 264476 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.4487103426844083, |
|
"eval_loss": 3.0518863201141357, |
|
"eval_runtime": 2.6462, |
|
"eval_samples_per_second": 181.012, |
|
"eval_steps_per_second": 1.512, |
|
"step": 264476 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"learning_rate": 5.624708236078693e-06, |
|
"loss": 2.9392, |
|
"step": 266263 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_accuracy": 0.45580133672641854, |
|
"eval_loss": 3.0162956714630127, |
|
"eval_runtime": 2.8146, |
|
"eval_samples_per_second": 170.186, |
|
"eval_steps_per_second": 1.421, |
|
"step": 266263 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 5.326775591863955e-06, |
|
"loss": 2.9377, |
|
"step": 268050 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_accuracy": 0.45181576114667726, |
|
"eval_loss": 3.015916585922241, |
|
"eval_runtime": 2.7189, |
|
"eval_samples_per_second": 176.176, |
|
"eval_steps_per_second": 1.471, |
|
"step": 268050 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 5.028842947649216e-06, |
|
"loss": 2.9386, |
|
"step": 269837 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_accuracy": 0.45453775343084657, |
|
"eval_loss": 3.0009806156158447, |
|
"eval_runtime": 2.8171, |
|
"eval_samples_per_second": 170.031, |
|
"eval_steps_per_second": 1.42, |
|
"step": 269837 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 4.7309103034344784e-06, |
|
"loss": 2.9391, |
|
"step": 271624 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.4530369705973036, |
|
"eval_loss": 3.034574270248413, |
|
"eval_runtime": 2.7339, |
|
"eval_samples_per_second": 175.207, |
|
"eval_steps_per_second": 1.463, |
|
"step": 271624 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"learning_rate": 4.43297765921974e-06, |
|
"loss": 2.9364, |
|
"step": 273411 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_accuracy": 0.45406984833727565, |
|
"eval_loss": 3.00394606590271, |
|
"eval_runtime": 2.6952, |
|
"eval_samples_per_second": 177.722, |
|
"eval_steps_per_second": 1.484, |
|
"step": 273411 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"learning_rate": 4.135045015005002e-06, |
|
"loss": 2.9359, |
|
"step": 275198 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_accuracy": 0.4519481963203824, |
|
"eval_loss": 3.041689157485962, |
|
"eval_runtime": 2.7367, |
|
"eval_samples_per_second": 175.028, |
|
"eval_steps_per_second": 1.462, |
|
"step": 275198 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 3.837112370790264e-06, |
|
"loss": 2.9359, |
|
"step": 276985 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_accuracy": 0.45435859894132224, |
|
"eval_loss": 3.0161099433898926, |
|
"eval_runtime": 2.74, |
|
"eval_samples_per_second": 174.817, |
|
"eval_steps_per_second": 1.46, |
|
"step": 276985 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"learning_rate": 3.5391797265755257e-06, |
|
"loss": 2.936, |
|
"step": 278772 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.45336758628392715, |
|
"eval_loss": 3.0168893337249756, |
|
"eval_runtime": 2.8607, |
|
"eval_samples_per_second": 167.443, |
|
"eval_steps_per_second": 1.398, |
|
"step": 278772 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"learning_rate": 3.241247082360787e-06, |
|
"loss": 2.9329, |
|
"step": 280559 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_accuracy": 0.4478249866434215, |
|
"eval_loss": 3.059436082839966, |
|
"eval_runtime": 2.856, |
|
"eval_samples_per_second": 167.714, |
|
"eval_steps_per_second": 1.401, |
|
"step": 280559 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"learning_rate": 2.9433144381460487e-06, |
|
"loss": 2.9336, |
|
"step": 282346 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_accuracy": 0.45551214066424783, |
|
"eval_loss": 3.0264980792999268, |
|
"eval_runtime": 2.9513, |
|
"eval_samples_per_second": 162.304, |
|
"eval_steps_per_second": 1.355, |
|
"step": 282346 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"learning_rate": 2.6453817939313106e-06, |
|
"loss": 2.9341, |
|
"step": 284133 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_accuracy": 0.4542034031560622, |
|
"eval_loss": 3.0276126861572266, |
|
"eval_runtime": 2.8692, |
|
"eval_samples_per_second": 166.946, |
|
"eval_steps_per_second": 1.394, |
|
"step": 284133 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 2.347449149716572e-06, |
|
"loss": 2.933, |
|
"step": 285920 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.4524090960261173, |
|
"eval_loss": 3.032360553741455, |
|
"eval_runtime": 2.7445, |
|
"eval_samples_per_second": 174.528, |
|
"eval_steps_per_second": 1.457, |
|
"step": 285920 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"learning_rate": 2.049516505501834e-06, |
|
"loss": 2.9325, |
|
"step": 287707 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_accuracy": 0.4489003880983182, |
|
"eval_loss": 3.024897813796997, |
|
"eval_runtime": 2.949, |
|
"eval_samples_per_second": 162.427, |
|
"eval_steps_per_second": 1.356, |
|
"step": 287707 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"learning_rate": 1.7515838612870958e-06, |
|
"loss": 2.932, |
|
"step": 289494 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_accuracy": 0.4518854201633658, |
|
"eval_loss": 3.0444188117980957, |
|
"eval_runtime": 3.2716, |
|
"eval_samples_per_second": 146.412, |
|
"eval_steps_per_second": 1.223, |
|
"step": 289494 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"learning_rate": 1.4536512170723575e-06, |
|
"loss": 2.9334, |
|
"step": 291281 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_accuracy": 0.4493931664657903, |
|
"eval_loss": 3.0419514179229736, |
|
"eval_runtime": 3.072, |
|
"eval_samples_per_second": 155.926, |
|
"eval_steps_per_second": 1.302, |
|
"step": 291281 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"learning_rate": 1.1557185728576192e-06, |
|
"loss": 2.9318, |
|
"step": 293068 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.454113304625021, |
|
"eval_loss": 2.9971697330474854, |
|
"eval_runtime": 3.0501, |
|
"eval_samples_per_second": 157.044, |
|
"eval_steps_per_second": 1.311, |
|
"step": 293068 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 8.57785928642881e-07, |
|
"loss": 2.9316, |
|
"step": 294855 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_accuracy": 0.4525653024759636, |
|
"eval_loss": 2.997295618057251, |
|
"eval_runtime": 2.7549, |
|
"eval_samples_per_second": 173.872, |
|
"eval_steps_per_second": 1.452, |
|
"step": 294855 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"learning_rate": 5.598532844281427e-07, |
|
"loss": 2.9318, |
|
"step": 296642 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_accuracy": 0.45290194408968354, |
|
"eval_loss": 3.0388541221618652, |
|
"eval_runtime": 3.0075, |
|
"eval_samples_per_second": 159.266, |
|
"eval_steps_per_second": 1.33, |
|
"step": 296642 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"learning_rate": 2.619206402134045e-07, |
|
"loss": 2.9301, |
|
"step": 298429 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_accuracy": 0.4556901408450704, |
|
"eval_loss": 3.013132095336914, |
|
"eval_runtime": 2.9991, |
|
"eval_samples_per_second": 159.713, |
|
"eval_steps_per_second": 1.334, |
|
"step": 298429 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"learning_rate": 0.0, |
|
"loss": 2.9291, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"eval_accuracy": 0.4548123706727812, |
|
"eval_loss": 3.006730794906616, |
|
"eval_runtime": 3.1035, |
|
"eval_samples_per_second": 154.341, |
|
"eval_steps_per_second": 1.289, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 167.88, |
|
"step": 300000, |
|
"total_flos": 3.687416469441741e+18, |
|
"train_loss": 3.1306004736328124, |
|
"train_runtime": 219827.3924, |
|
"train_samples_per_second": 174.683, |
|
"train_steps_per_second": 1.365 |
|
} |
|
], |
|
"max_steps": 300000, |
|
"num_train_epochs": 168, |
|
"total_flos": 3.687416469441741e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|