diff --git "a/regional_perception_checkpoint-6379/trainer_state.json" "b/regional_perception_checkpoint-6379/trainer_state.json" new file mode 100644--- /dev/null +++ "b/regional_perception_checkpoint-6379/trainer_state.json" @@ -0,0 +1,11550 @@ +{ + "best_metric": 0.58554959, + "best_model_checkpoint": "/data1/wjx/model/swift/output/v1d+v3_prompt/output/internvl2-26b/v0-20240810-170945/checkpoint-6379", + "epoch": 4.999706084059959, + "eval_steps": 1, + "global_step": 6379, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "acc": 0.49760315, + "epoch": 0.0, + "learning_rate": 1.567398119122257e-07, + "loss": 1.98409796, + "memory(GiB)": 54.93, + "step": 1, + "train_speed(iter/s)": 0.023783 + }, + { + "acc": 0.52513188, + "epoch": 0.0, + "learning_rate": 7.836990595611285e-07, + "loss": 1.79844522, + "memory(GiB)": 54.93, + "step": 5, + "train_speed(iter/s)": 0.024289 + }, + { + "acc": 0.52413821, + "epoch": 0.01, + "learning_rate": 1.567398119122257e-06, + "loss": 1.82612877, + "memory(GiB)": 58.46, + "step": 10, + "train_speed(iter/s)": 0.023965 + }, + { + "acc": 0.51515636, + "epoch": 0.01, + "learning_rate": 2.3510971786833857e-06, + "loss": 1.82043953, + "memory(GiB)": 58.46, + "step": 15, + "train_speed(iter/s)": 0.024073 + }, + { + "acc": 0.51839466, + "epoch": 0.02, + "learning_rate": 3.134796238244514e-06, + "loss": 1.80918102, + "memory(GiB)": 58.46, + "step": 20, + "train_speed(iter/s)": 0.024066 + }, + { + "acc": 0.51010175, + "epoch": 0.02, + "learning_rate": 3.9184952978056436e-06, + "loss": 1.8428627, + "memory(GiB)": 58.46, + "step": 25, + "train_speed(iter/s)": 0.024068 + }, + { + "acc": 0.516012, + "epoch": 0.02, + "learning_rate": 4.7021943573667714e-06, + "loss": 1.81627235, + "memory(GiB)": 58.46, + "step": 30, + "train_speed(iter/s)": 0.024143 + }, + { + "acc": 0.50783205, + "epoch": 0.03, + "learning_rate": 5.4858934169279e-06, + "loss": 1.87089043, + "memory(GiB)": 58.46, + "step": 35, + "train_speed(iter/s)": 0.024184 + }, + { + "acc": 0.52221713, + "epoch": 0.03, + "learning_rate": 6.269592476489028e-06, + "loss": 1.78321209, + "memory(GiB)": 58.46, + "step": 40, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.53826532, + "epoch": 0.04, + "learning_rate": 7.053291536050157e-06, + "loss": 1.70393295, + "memory(GiB)": 58.46, + "step": 45, + "train_speed(iter/s)": 0.024212 + }, + { + "acc": 0.52223167, + "epoch": 0.04, + "learning_rate": 7.836990595611287e-06, + "loss": 1.74184074, + "memory(GiB)": 58.49, + "step": 50, + "train_speed(iter/s)": 0.024215 + }, + { + "acc": 0.55080438, + "epoch": 0.04, + "learning_rate": 8.620689655172414e-06, + "loss": 1.66886234, + "memory(GiB)": 58.49, + "step": 55, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.55040584, + "epoch": 0.05, + "learning_rate": 9.404388714733543e-06, + "loss": 1.62168198, + "memory(GiB)": 58.49, + "step": 60, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.56035452, + "epoch": 0.05, + "learning_rate": 1.0188087774294672e-05, + "loss": 1.61962128, + "memory(GiB)": 58.49, + "step": 65, + "train_speed(iter/s)": 0.024223 + }, + { + "acc": 0.5660913, + "epoch": 0.05, + "learning_rate": 1.09717868338558e-05, + "loss": 1.59430513, + "memory(GiB)": 58.49, + "step": 70, + "train_speed(iter/s)": 0.024236 + }, + { + "acc": 0.57812696, + "epoch": 0.06, + "learning_rate": 1.1755485893416929e-05, + "loss": 1.51112862, + "memory(GiB)": 58.49, + "step": 75, + "train_speed(iter/s)": 0.024239 + }, + { + "acc": 0.58022437, + "epoch": 0.06, + "learning_rate": 1.2539184952978056e-05, + "loss": 1.45745325, + "memory(GiB)": 58.49, + "step": 80, + "train_speed(iter/s)": 0.024261 + }, + { + "acc": 0.58660021, + "epoch": 0.07, + "learning_rate": 1.3322884012539186e-05, + "loss": 1.47154408, + "memory(GiB)": 58.49, + "step": 85, + "train_speed(iter/s)": 0.024254 + }, + { + "acc": 0.58845835, + "epoch": 0.07, + "learning_rate": 1.4106583072100313e-05, + "loss": 1.46421566, + "memory(GiB)": 60.96, + "step": 90, + "train_speed(iter/s)": 0.024265 + }, + { + "acc": 0.59027457, + "epoch": 0.07, + "learning_rate": 1.4890282131661442e-05, + "loss": 1.49178133, + "memory(GiB)": 60.96, + "step": 95, + "train_speed(iter/s)": 0.024276 + }, + { + "acc": 0.59797707, + "epoch": 0.08, + "learning_rate": 1.5673981191222574e-05, + "loss": 1.42499933, + "memory(GiB)": 60.96, + "step": 100, + "train_speed(iter/s)": 0.024276 + }, + { + "acc": 0.61143513, + "epoch": 0.08, + "learning_rate": 1.64576802507837e-05, + "loss": 1.35773869, + "memory(GiB)": 60.96, + "step": 105, + "train_speed(iter/s)": 0.024269 + }, + { + "acc": 0.60093813, + "epoch": 0.09, + "learning_rate": 1.7241379310344828e-05, + "loss": 1.42934065, + "memory(GiB)": 60.96, + "step": 110, + "train_speed(iter/s)": 0.024277 + }, + { + "acc": 0.60922484, + "epoch": 0.09, + "learning_rate": 1.8025078369905957e-05, + "loss": 1.38470116, + "memory(GiB)": 60.96, + "step": 115, + "train_speed(iter/s)": 0.024276 + }, + { + "acc": 0.59300284, + "epoch": 0.09, + "learning_rate": 1.8808777429467086e-05, + "loss": 1.40323668, + "memory(GiB)": 60.96, + "step": 120, + "train_speed(iter/s)": 0.02428 + }, + { + "acc": 0.59103971, + "epoch": 0.1, + "learning_rate": 1.9592476489028214e-05, + "loss": 1.42401762, + "memory(GiB)": 60.96, + "step": 125, + "train_speed(iter/s)": 0.024274 + }, + { + "acc": 0.59748178, + "epoch": 0.1, + "learning_rate": 2.0376175548589343e-05, + "loss": 1.43538437, + "memory(GiB)": 60.96, + "step": 130, + "train_speed(iter/s)": 0.024284 + }, + { + "acc": 0.59937601, + "epoch": 0.11, + "learning_rate": 2.1159874608150472e-05, + "loss": 1.37400723, + "memory(GiB)": 60.96, + "step": 135, + "train_speed(iter/s)": 0.024272 + }, + { + "acc": 0.60988913, + "epoch": 0.11, + "learning_rate": 2.19435736677116e-05, + "loss": 1.33568382, + "memory(GiB)": 60.96, + "step": 140, + "train_speed(iter/s)": 0.024269 + }, + { + "acc": 0.59887972, + "epoch": 0.11, + "learning_rate": 2.272727272727273e-05, + "loss": 1.37526703, + "memory(GiB)": 60.96, + "step": 145, + "train_speed(iter/s)": 0.024273 + }, + { + "acc": 0.60464563, + "epoch": 0.12, + "learning_rate": 2.3510971786833858e-05, + "loss": 1.38023567, + "memory(GiB)": 60.96, + "step": 150, + "train_speed(iter/s)": 0.02427 + }, + { + "acc": 0.62468381, + "epoch": 0.12, + "learning_rate": 2.4294670846394983e-05, + "loss": 1.31268759, + "memory(GiB)": 60.96, + "step": 155, + "train_speed(iter/s)": 0.024263 + }, + { + "acc": 0.615977, + "epoch": 0.13, + "learning_rate": 2.5078369905956112e-05, + "loss": 1.31516523, + "memory(GiB)": 60.96, + "step": 160, + "train_speed(iter/s)": 0.024272 + }, + { + "acc": 0.62613187, + "epoch": 0.13, + "learning_rate": 2.5862068965517244e-05, + "loss": 1.30936899, + "memory(GiB)": 60.96, + "step": 165, + "train_speed(iter/s)": 0.02427 + }, + { + "acc": 0.60663314, + "epoch": 0.13, + "learning_rate": 2.6645768025078373e-05, + "loss": 1.34656582, + "memory(GiB)": 60.96, + "step": 170, + "train_speed(iter/s)": 0.024273 + }, + { + "acc": 0.6247479, + "epoch": 0.14, + "learning_rate": 2.7429467084639498e-05, + "loss": 1.30112534, + "memory(GiB)": 60.96, + "step": 175, + "train_speed(iter/s)": 0.024272 + }, + { + "acc": 0.62302966, + "epoch": 0.14, + "learning_rate": 2.8213166144200627e-05, + "loss": 1.3161747, + "memory(GiB)": 63.69, + "step": 180, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.62723155, + "epoch": 0.14, + "learning_rate": 2.899686520376176e-05, + "loss": 1.28483057, + "memory(GiB)": 63.69, + "step": 185, + "train_speed(iter/s)": 0.02424 + }, + { + "acc": 0.61306233, + "epoch": 0.15, + "learning_rate": 2.9780564263322884e-05, + "loss": 1.3089962, + "memory(GiB)": 63.69, + "step": 190, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.60289783, + "epoch": 0.15, + "learning_rate": 3.056426332288401e-05, + "loss": 1.38051376, + "memory(GiB)": 63.69, + "step": 195, + "train_speed(iter/s)": 0.024228 + }, + { + "acc": 0.61080809, + "epoch": 0.16, + "learning_rate": 3.134796238244515e-05, + "loss": 1.34134455, + "memory(GiB)": 63.69, + "step": 200, + "train_speed(iter/s)": 0.024225 + }, + { + "acc": 0.62451038, + "epoch": 0.16, + "learning_rate": 3.213166144200627e-05, + "loss": 1.30470991, + "memory(GiB)": 63.69, + "step": 205, + "train_speed(iter/s)": 0.024222 + }, + { + "acc": 0.61880503, + "epoch": 0.16, + "learning_rate": 3.29153605015674e-05, + "loss": 1.31368427, + "memory(GiB)": 63.69, + "step": 210, + "train_speed(iter/s)": 0.02423 + }, + { + "acc": 0.62762785, + "epoch": 0.17, + "learning_rate": 3.369905956112853e-05, + "loss": 1.25870705, + "memory(GiB)": 63.69, + "step": 215, + "train_speed(iter/s)": 0.024226 + }, + { + "acc": 0.6347024, + "epoch": 0.17, + "learning_rate": 3.4482758620689657e-05, + "loss": 1.25454168, + "memory(GiB)": 63.69, + "step": 220, + "train_speed(iter/s)": 0.024227 + }, + { + "acc": 0.63634839, + "epoch": 0.18, + "learning_rate": 3.5266457680250785e-05, + "loss": 1.24715233, + "memory(GiB)": 63.69, + "step": 225, + "train_speed(iter/s)": 0.024228 + }, + { + "acc": 0.61799154, + "epoch": 0.18, + "learning_rate": 3.6050156739811914e-05, + "loss": 1.33669329, + "memory(GiB)": 63.69, + "step": 230, + "train_speed(iter/s)": 0.024225 + }, + { + "acc": 0.61765265, + "epoch": 0.18, + "learning_rate": 3.683385579937304e-05, + "loss": 1.29450636, + "memory(GiB)": 63.69, + "step": 235, + "train_speed(iter/s)": 0.024226 + }, + { + "acc": 0.62776251, + "epoch": 0.19, + "learning_rate": 3.761755485893417e-05, + "loss": 1.29685411, + "memory(GiB)": 63.69, + "step": 240, + "train_speed(iter/s)": 0.024226 + }, + { + "acc": 0.62579837, + "epoch": 0.19, + "learning_rate": 3.84012539184953e-05, + "loss": 1.29461927, + "memory(GiB)": 63.69, + "step": 245, + "train_speed(iter/s)": 0.024227 + }, + { + "acc": 0.62839761, + "epoch": 0.2, + "learning_rate": 3.918495297805643e-05, + "loss": 1.30260792, + "memory(GiB)": 63.69, + "step": 250, + "train_speed(iter/s)": 0.02423 + }, + { + "acc": 0.62507124, + "epoch": 0.2, + "learning_rate": 3.996865203761756e-05, + "loss": 1.28496952, + "memory(GiB)": 63.69, + "step": 255, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.62980595, + "epoch": 0.2, + "learning_rate": 4.0752351097178686e-05, + "loss": 1.29334612, + "memory(GiB)": 63.69, + "step": 260, + "train_speed(iter/s)": 0.024242 + }, + { + "acc": 0.63067842, + "epoch": 0.21, + "learning_rate": 4.1536050156739815e-05, + "loss": 1.30464935, + "memory(GiB)": 63.69, + "step": 265, + "train_speed(iter/s)": 0.024247 + }, + { + "acc": 0.637749, + "epoch": 0.21, + "learning_rate": 4.2319749216300944e-05, + "loss": 1.26530437, + "memory(GiB)": 63.69, + "step": 270, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.62997618, + "epoch": 0.22, + "learning_rate": 4.3103448275862066e-05, + "loss": 1.27895756, + "memory(GiB)": 63.69, + "step": 275, + "train_speed(iter/s)": 0.02424 + }, + { + "acc": 0.62871423, + "epoch": 0.22, + "learning_rate": 4.38871473354232e-05, + "loss": 1.24851456, + "memory(GiB)": 63.69, + "step": 280, + "train_speed(iter/s)": 0.024242 + }, + { + "acc": 0.62472744, + "epoch": 0.22, + "learning_rate": 4.467084639498433e-05, + "loss": 1.280616, + "memory(GiB)": 63.69, + "step": 285, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.6316843, + "epoch": 0.23, + "learning_rate": 4.545454545454546e-05, + "loss": 1.25226479, + "memory(GiB)": 66.18, + "step": 290, + "train_speed(iter/s)": 0.024242 + }, + { + "acc": 0.63331189, + "epoch": 0.23, + "learning_rate": 4.623824451410659e-05, + "loss": 1.24571609, + "memory(GiB)": 66.18, + "step": 295, + "train_speed(iter/s)": 0.024241 + }, + { + "acc": 0.6343452, + "epoch": 0.24, + "learning_rate": 4.7021943573667716e-05, + "loss": 1.28765945, + "memory(GiB)": 66.18, + "step": 300, + "train_speed(iter/s)": 0.024239 + }, + { + "acc": 0.61879272, + "epoch": 0.24, + "learning_rate": 4.7805642633228845e-05, + "loss": 1.32476501, + "memory(GiB)": 66.18, + "step": 305, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.64381948, + "epoch": 0.24, + "learning_rate": 4.858934169278997e-05, + "loss": 1.21850481, + "memory(GiB)": 66.18, + "step": 310, + "train_speed(iter/s)": 0.024237 + }, + { + "acc": 0.6378974, + "epoch": 0.25, + "learning_rate": 4.93730407523511e-05, + "loss": 1.23893595, + "memory(GiB)": 66.18, + "step": 315, + "train_speed(iter/s)": 0.024236 + }, + { + "acc": 0.64307256, + "epoch": 0.25, + "learning_rate": 5.0156739811912224e-05, + "loss": 1.21721163, + "memory(GiB)": 66.18, + "step": 320, + "train_speed(iter/s)": 0.024236 + }, + { + "acc": 0.63342023, + "epoch": 0.25, + "learning_rate": 5.094043887147336e-05, + "loss": 1.22513857, + "memory(GiB)": 66.18, + "step": 325, + "train_speed(iter/s)": 0.024239 + }, + { + "acc": 0.62956347, + "epoch": 0.26, + "learning_rate": 5.172413793103449e-05, + "loss": 1.273248, + "memory(GiB)": 66.18, + "step": 330, + "train_speed(iter/s)": 0.024236 + }, + { + "acc": 0.6396666, + "epoch": 0.26, + "learning_rate": 5.250783699059562e-05, + "loss": 1.2052968, + "memory(GiB)": 66.18, + "step": 335, + "train_speed(iter/s)": 0.024242 + }, + { + "acc": 0.63923178, + "epoch": 0.27, + "learning_rate": 5.3291536050156746e-05, + "loss": 1.19400892, + "memory(GiB)": 66.18, + "step": 340, + "train_speed(iter/s)": 0.024241 + }, + { + "acc": 0.64041128, + "epoch": 0.27, + "learning_rate": 5.407523510971787e-05, + "loss": 1.19951448, + "memory(GiB)": 66.18, + "step": 345, + "train_speed(iter/s)": 0.02424 + }, + { + "acc": 0.64085617, + "epoch": 0.27, + "learning_rate": 5.4858934169278996e-05, + "loss": 1.21989889, + "memory(GiB)": 66.18, + "step": 350, + "train_speed(iter/s)": 0.024241 + }, + { + "acc": 0.63642988, + "epoch": 0.28, + "learning_rate": 5.5642633228840125e-05, + "loss": 1.26623602, + "memory(GiB)": 66.18, + "step": 355, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.64299493, + "epoch": 0.28, + "learning_rate": 5.6426332288401254e-05, + "loss": 1.22799644, + "memory(GiB)": 66.18, + "step": 360, + "train_speed(iter/s)": 0.024246 + }, + { + "acc": 0.62601166, + "epoch": 0.29, + "learning_rate": 5.721003134796239e-05, + "loss": 1.27594986, + "memory(GiB)": 66.18, + "step": 365, + "train_speed(iter/s)": 0.024248 + }, + { + "acc": 0.63284698, + "epoch": 0.29, + "learning_rate": 5.799373040752352e-05, + "loss": 1.2637042, + "memory(GiB)": 66.18, + "step": 370, + "train_speed(iter/s)": 0.02425 + }, + { + "acc": 0.64749308, + "epoch": 0.29, + "learning_rate": 5.877742946708465e-05, + "loss": 1.17957096, + "memory(GiB)": 68.68, + "step": 375, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.63170052, + "epoch": 0.3, + "learning_rate": 5.956112852664577e-05, + "loss": 1.26501703, + "memory(GiB)": 68.68, + "step": 380, + "train_speed(iter/s)": 0.024248 + }, + { + "acc": 0.64609647, + "epoch": 0.3, + "learning_rate": 6.03448275862069e-05, + "loss": 1.1953764, + "memory(GiB)": 68.68, + "step": 385, + "train_speed(iter/s)": 0.024246 + }, + { + "acc": 0.63454189, + "epoch": 0.31, + "learning_rate": 6.112852664576803e-05, + "loss": 1.22050667, + "memory(GiB)": 68.68, + "step": 390, + "train_speed(iter/s)": 0.024249 + }, + { + "acc": 0.6379528, + "epoch": 0.31, + "learning_rate": 6.191222570532915e-05, + "loss": 1.21711063, + "memory(GiB)": 68.68, + "step": 395, + "train_speed(iter/s)": 0.024252 + }, + { + "acc": 0.64480844, + "epoch": 0.31, + "learning_rate": 6.26959247648903e-05, + "loss": 1.18095789, + "memory(GiB)": 68.68, + "step": 400, + "train_speed(iter/s)": 0.024249 + }, + { + "acc": 0.64001446, + "epoch": 0.32, + "learning_rate": 6.347962382445141e-05, + "loss": 1.20016241, + "memory(GiB)": 68.68, + "step": 405, + "train_speed(iter/s)": 0.02425 + }, + { + "acc": 0.64171805, + "epoch": 0.32, + "learning_rate": 6.426332288401254e-05, + "loss": 1.23874407, + "memory(GiB)": 68.68, + "step": 410, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.62769017, + "epoch": 0.33, + "learning_rate": 6.504702194357367e-05, + "loss": 1.28347731, + "memory(GiB)": 68.68, + "step": 415, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.6374104, + "epoch": 0.33, + "learning_rate": 6.58307210031348e-05, + "loss": 1.20551748, + "memory(GiB)": 68.68, + "step": 420, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.63077922, + "epoch": 0.33, + "learning_rate": 6.661442006269593e-05, + "loss": 1.23985853, + "memory(GiB)": 68.68, + "step": 425, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.63933249, + "epoch": 0.34, + "learning_rate": 6.739811912225706e-05, + "loss": 1.25593414, + "memory(GiB)": 68.68, + "step": 430, + "train_speed(iter/s)": 0.024248 + }, + { + "acc": 0.63769989, + "epoch": 0.34, + "learning_rate": 6.818181818181818e-05, + "loss": 1.22561283, + "memory(GiB)": 68.68, + "step": 435, + "train_speed(iter/s)": 0.024248 + }, + { + "acc": 0.62949352, + "epoch": 0.34, + "learning_rate": 6.896551724137931e-05, + "loss": 1.26371164, + "memory(GiB)": 68.68, + "step": 440, + "train_speed(iter/s)": 0.024252 + }, + { + "acc": 0.64247766, + "epoch": 0.35, + "learning_rate": 6.974921630094044e-05, + "loss": 1.20132828, + "memory(GiB)": 68.68, + "step": 445, + "train_speed(iter/s)": 0.024255 + }, + { + "acc": 0.63743186, + "epoch": 0.35, + "learning_rate": 7.053291536050157e-05, + "loss": 1.25252228, + "memory(GiB)": 68.68, + "step": 450, + "train_speed(iter/s)": 0.024255 + }, + { + "acc": 0.63471522, + "epoch": 0.36, + "learning_rate": 7.13166144200627e-05, + "loss": 1.21878319, + "memory(GiB)": 68.68, + "step": 455, + "train_speed(iter/s)": 0.024252 + }, + { + "acc": 0.64033532, + "epoch": 0.36, + "learning_rate": 7.210031347962383e-05, + "loss": 1.23047905, + "memory(GiB)": 68.68, + "step": 460, + "train_speed(iter/s)": 0.02425 + }, + { + "acc": 0.62159195, + "epoch": 0.36, + "learning_rate": 7.288401253918496e-05, + "loss": 1.26657543, + "memory(GiB)": 68.68, + "step": 465, + "train_speed(iter/s)": 0.024247 + }, + { + "acc": 0.63586564, + "epoch": 0.37, + "learning_rate": 7.366771159874609e-05, + "loss": 1.22125769, + "memory(GiB)": 68.68, + "step": 470, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.62739558, + "epoch": 0.37, + "learning_rate": 7.445141065830721e-05, + "loss": 1.24667158, + "memory(GiB)": 68.68, + "step": 475, + "train_speed(iter/s)": 0.02425 + }, + { + "acc": 0.64560175, + "epoch": 0.38, + "learning_rate": 7.523510971786834e-05, + "loss": 1.19969473, + "memory(GiB)": 68.68, + "step": 480, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.64611869, + "epoch": 0.38, + "learning_rate": 7.601880877742947e-05, + "loss": 1.18089733, + "memory(GiB)": 68.68, + "step": 485, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.6526392, + "epoch": 0.38, + "learning_rate": 7.68025078369906e-05, + "loss": 1.18621178, + "memory(GiB)": 68.68, + "step": 490, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.64487033, + "epoch": 0.39, + "learning_rate": 7.758620689655173e-05, + "loss": 1.22172861, + "memory(GiB)": 68.68, + "step": 495, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.64801369, + "epoch": 0.39, + "learning_rate": 7.836990595611286e-05, + "loss": 1.1807312, + "memory(GiB)": 68.68, + "step": 500, + "train_speed(iter/s)": 0.024252 + }, + { + "acc": 0.65408354, + "epoch": 0.4, + "learning_rate": 7.915360501567399e-05, + "loss": 1.17569618, + "memory(GiB)": 68.68, + "step": 505, + "train_speed(iter/s)": 0.024252 + }, + { + "acc": 0.64917159, + "epoch": 0.4, + "learning_rate": 7.993730407523512e-05, + "loss": 1.18598318, + "memory(GiB)": 68.68, + "step": 510, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.6524426, + "epoch": 0.4, + "learning_rate": 8.072100313479624e-05, + "loss": 1.19354515, + "memory(GiB)": 68.68, + "step": 515, + "train_speed(iter/s)": 0.024252 + }, + { + "acc": 0.63392072, + "epoch": 0.41, + "learning_rate": 8.150470219435737e-05, + "loss": 1.25341883, + "memory(GiB)": 68.68, + "step": 520, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.64778457, + "epoch": 0.41, + "learning_rate": 8.22884012539185e-05, + "loss": 1.18014956, + "memory(GiB)": 68.68, + "step": 525, + "train_speed(iter/s)": 0.024252 + }, + { + "acc": 0.65192814, + "epoch": 0.42, + "learning_rate": 8.307210031347963e-05, + "loss": 1.14602833, + "memory(GiB)": 68.68, + "step": 530, + "train_speed(iter/s)": 0.024252 + }, + { + "acc": 0.6464828, + "epoch": 0.42, + "learning_rate": 8.385579937304076e-05, + "loss": 1.20210142, + "memory(GiB)": 68.68, + "step": 535, + "train_speed(iter/s)": 0.024249 + }, + { + "acc": 0.64848366, + "epoch": 0.42, + "learning_rate": 8.463949843260189e-05, + "loss": 1.16304598, + "memory(GiB)": 68.68, + "step": 540, + "train_speed(iter/s)": 0.024247 + }, + { + "acc": 0.65109825, + "epoch": 0.43, + "learning_rate": 8.542319749216302e-05, + "loss": 1.17073431, + "memory(GiB)": 68.68, + "step": 545, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.64547157, + "epoch": 0.43, + "learning_rate": 8.620689655172413e-05, + "loss": 1.21380081, + "memory(GiB)": 68.68, + "step": 550, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.63410115, + "epoch": 0.43, + "learning_rate": 8.699059561128527e-05, + "loss": 1.25124855, + "memory(GiB)": 68.68, + "step": 555, + "train_speed(iter/s)": 0.024246 + }, + { + "acc": 0.63803411, + "epoch": 0.44, + "learning_rate": 8.77742946708464e-05, + "loss": 1.2044651, + "memory(GiB)": 68.68, + "step": 560, + "train_speed(iter/s)": 0.024247 + }, + { + "acc": 0.64598598, + "epoch": 0.44, + "learning_rate": 8.855799373040753e-05, + "loss": 1.18607101, + "memory(GiB)": 68.68, + "step": 565, + "train_speed(iter/s)": 0.024247 + }, + { + "acc": 0.63589487, + "epoch": 0.45, + "learning_rate": 8.934169278996866e-05, + "loss": 1.21959095, + "memory(GiB)": 68.68, + "step": 570, + "train_speed(iter/s)": 0.024249 + }, + { + "acc": 0.66258736, + "epoch": 0.45, + "learning_rate": 9.012539184952979e-05, + "loss": 1.14576664, + "memory(GiB)": 68.68, + "step": 575, + "train_speed(iter/s)": 0.024249 + }, + { + "acc": 0.644449, + "epoch": 0.45, + "learning_rate": 9.090909090909092e-05, + "loss": 1.17768698, + "memory(GiB)": 68.68, + "step": 580, + "train_speed(iter/s)": 0.02425 + }, + { + "acc": 0.63850074, + "epoch": 0.46, + "learning_rate": 9.169278996865203e-05, + "loss": 1.21261606, + "memory(GiB)": 68.68, + "step": 585, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.66103854, + "epoch": 0.46, + "learning_rate": 9.247648902821317e-05, + "loss": 1.14370327, + "memory(GiB)": 68.68, + "step": 590, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.64929209, + "epoch": 0.47, + "learning_rate": 9.32601880877743e-05, + "loss": 1.17919035, + "memory(GiB)": 68.68, + "step": 595, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.6462472, + "epoch": 0.47, + "learning_rate": 9.404388714733543e-05, + "loss": 1.19705715, + "memory(GiB)": 68.68, + "step": 600, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.65177841, + "epoch": 0.47, + "learning_rate": 9.482758620689656e-05, + "loss": 1.17879667, + "memory(GiB)": 68.68, + "step": 605, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.63742218, + "epoch": 0.48, + "learning_rate": 9.561128526645769e-05, + "loss": 1.22292814, + "memory(GiB)": 68.68, + "step": 610, + "train_speed(iter/s)": 0.024257 + }, + { + "acc": 0.6366725, + "epoch": 0.48, + "learning_rate": 9.63949843260188e-05, + "loss": 1.21893911, + "memory(GiB)": 68.68, + "step": 615, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.6517189, + "epoch": 0.49, + "learning_rate": 9.717868338557993e-05, + "loss": 1.16371479, + "memory(GiB)": 68.68, + "step": 620, + "train_speed(iter/s)": 0.024257 + }, + { + "acc": 0.640413, + "epoch": 0.49, + "learning_rate": 9.796238244514106e-05, + "loss": 1.21149368, + "memory(GiB)": 68.68, + "step": 625, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.64837985, + "epoch": 0.49, + "learning_rate": 9.87460815047022e-05, + "loss": 1.18780231, + "memory(GiB)": 68.68, + "step": 630, + "train_speed(iter/s)": 0.024257 + }, + { + "acc": 0.64622922, + "epoch": 0.5, + "learning_rate": 9.952978056426333e-05, + "loss": 1.19707012, + "memory(GiB)": 68.68, + "step": 635, + "train_speed(iter/s)": 0.024259 + }, + { + "acc": 0.63969393, + "epoch": 0.5, + "learning_rate": 9.999999327227841e-05, + "loss": 1.19009142, + "memory(GiB)": 68.68, + "step": 640, + "train_speed(iter/s)": 0.02426 + }, + { + "acc": 0.66441765, + "epoch": 0.51, + "learning_rate": 9.999991758543125e-05, + "loss": 1.15019789, + "memory(GiB)": 68.68, + "step": 645, + "train_speed(iter/s)": 0.02426 + }, + { + "acc": 0.64071684, + "epoch": 0.51, + "learning_rate": 9.999975780221265e-05, + "loss": 1.21033449, + "memory(GiB)": 68.68, + "step": 650, + "train_speed(iter/s)": 0.024262 + }, + { + "acc": 0.65804129, + "epoch": 0.51, + "learning_rate": 9.999951392289139e-05, + "loss": 1.14957666, + "memory(GiB)": 68.68, + "step": 655, + "train_speed(iter/s)": 0.024264 + }, + { + "acc": 0.64765239, + "epoch": 0.52, + "learning_rate": 9.999918594787761e-05, + "loss": 1.19038534, + "memory(GiB)": 68.68, + "step": 660, + "train_speed(iter/s)": 0.024263 + }, + { + "acc": 0.64099531, + "epoch": 0.52, + "learning_rate": 9.999877387772296e-05, + "loss": 1.21312761, + "memory(GiB)": 68.68, + "step": 665, + "train_speed(iter/s)": 0.024263 + }, + { + "acc": 0.63610272, + "epoch": 0.53, + "learning_rate": 9.999827771312054e-05, + "loss": 1.20392857, + "memory(GiB)": 68.68, + "step": 670, + "train_speed(iter/s)": 0.024261 + }, + { + "acc": 0.65267296, + "epoch": 0.53, + "learning_rate": 9.999769745490481e-05, + "loss": 1.17815151, + "memory(GiB)": 68.68, + "step": 675, + "train_speed(iter/s)": 0.02426 + }, + { + "acc": 0.64978647, + "epoch": 0.53, + "learning_rate": 9.999703310405178e-05, + "loss": 1.17260685, + "memory(GiB)": 68.68, + "step": 680, + "train_speed(iter/s)": 0.024259 + }, + { + "acc": 0.64660273, + "epoch": 0.54, + "learning_rate": 9.999628466167881e-05, + "loss": 1.1884903, + "memory(GiB)": 68.68, + "step": 685, + "train_speed(iter/s)": 0.024255 + }, + { + "acc": 0.64088135, + "epoch": 0.54, + "learning_rate": 9.999545212904473e-05, + "loss": 1.21543169, + "memory(GiB)": 71.19, + "step": 690, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.64940615, + "epoch": 0.54, + "learning_rate": 9.999453550754981e-05, + "loss": 1.16802692, + "memory(GiB)": 71.19, + "step": 695, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.64227314, + "epoch": 0.55, + "learning_rate": 9.999353479873575e-05, + "loss": 1.19985647, + "memory(GiB)": 71.19, + "step": 700, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.65743675, + "epoch": 0.55, + "learning_rate": 9.999245000428563e-05, + "loss": 1.13964968, + "memory(GiB)": 71.19, + "step": 705, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.65150938, + "epoch": 0.56, + "learning_rate": 9.999128112602406e-05, + "loss": 1.13540983, + "memory(GiB)": 71.19, + "step": 710, + "train_speed(iter/s)": 0.024254 + }, + { + "acc": 0.65529537, + "epoch": 0.56, + "learning_rate": 9.999002816591696e-05, + "loss": 1.148839, + "memory(GiB)": 71.19, + "step": 715, + "train_speed(iter/s)": 0.024255 + }, + { + "acc": 0.65492792, + "epoch": 0.56, + "learning_rate": 9.998869112607177e-05, + "loss": 1.16663084, + "memory(GiB)": 71.19, + "step": 720, + "train_speed(iter/s)": 0.024255 + }, + { + "acc": 0.64847426, + "epoch": 0.57, + "learning_rate": 9.998727000873724e-05, + "loss": 1.19009991, + "memory(GiB)": 71.19, + "step": 725, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.65983605, + "epoch": 0.57, + "learning_rate": 9.998576481630364e-05, + "loss": 1.17517796, + "memory(GiB)": 71.19, + "step": 730, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.66514916, + "epoch": 0.58, + "learning_rate": 9.998417555130258e-05, + "loss": 1.11018133, + "memory(GiB)": 71.19, + "step": 735, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.65130382, + "epoch": 0.58, + "learning_rate": 9.998250221640708e-05, + "loss": 1.18467026, + "memory(GiB)": 71.19, + "step": 740, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.64631677, + "epoch": 0.58, + "learning_rate": 9.99807448144316e-05, + "loss": 1.16014452, + "memory(GiB)": 71.19, + "step": 745, + "train_speed(iter/s)": 0.024258 + }, + { + "acc": 0.64330449, + "epoch": 0.59, + "learning_rate": 9.997890334833194e-05, + "loss": 1.16912346, + "memory(GiB)": 71.19, + "step": 750, + "train_speed(iter/s)": 0.02426 + }, + { + "acc": 0.64438591, + "epoch": 0.59, + "learning_rate": 9.997697782120535e-05, + "loss": 1.18001528, + "memory(GiB)": 71.19, + "step": 755, + "train_speed(iter/s)": 0.024262 + }, + { + "acc": 0.64493594, + "epoch": 0.6, + "learning_rate": 9.997496823629038e-05, + "loss": 1.18717165, + "memory(GiB)": 71.19, + "step": 760, + "train_speed(iter/s)": 0.024261 + }, + { + "acc": 0.64458404, + "epoch": 0.6, + "learning_rate": 9.997287459696707e-05, + "loss": 1.2015686, + "memory(GiB)": 71.19, + "step": 765, + "train_speed(iter/s)": 0.024262 + }, + { + "acc": 0.65334735, + "epoch": 0.6, + "learning_rate": 9.997069690675673e-05, + "loss": 1.13985786, + "memory(GiB)": 71.19, + "step": 770, + "train_speed(iter/s)": 0.024263 + }, + { + "acc": 0.65646448, + "epoch": 0.61, + "learning_rate": 9.996843516932212e-05, + "loss": 1.14643917, + "memory(GiB)": 71.19, + "step": 775, + "train_speed(iter/s)": 0.024261 + }, + { + "acc": 0.63831444, + "epoch": 0.61, + "learning_rate": 9.99660893884673e-05, + "loss": 1.20351677, + "memory(GiB)": 71.19, + "step": 780, + "train_speed(iter/s)": 0.024261 + }, + { + "acc": 0.64026852, + "epoch": 0.62, + "learning_rate": 9.996365956813771e-05, + "loss": 1.21742287, + "memory(GiB)": 71.19, + "step": 785, + "train_speed(iter/s)": 0.024261 + }, + { + "acc": 0.65602589, + "epoch": 0.62, + "learning_rate": 9.996114571242014e-05, + "loss": 1.1601553, + "memory(GiB)": 71.19, + "step": 790, + "train_speed(iter/s)": 0.024259 + }, + { + "acc": 0.64420905, + "epoch": 0.62, + "learning_rate": 9.995854782554275e-05, + "loss": 1.19600077, + "memory(GiB)": 71.19, + "step": 795, + "train_speed(iter/s)": 0.02426 + }, + { + "acc": 0.64605112, + "epoch": 0.63, + "learning_rate": 9.995586591187496e-05, + "loss": 1.18536654, + "memory(GiB)": 71.19, + "step": 800, + "train_speed(iter/s)": 0.02426 + }, + { + "acc": 0.63376851, + "epoch": 0.63, + "learning_rate": 9.995309997592757e-05, + "loss": 1.2327877, + "memory(GiB)": 71.19, + "step": 805, + "train_speed(iter/s)": 0.024261 + }, + { + "acc": 0.63126373, + "epoch": 0.63, + "learning_rate": 9.995025002235272e-05, + "loss": 1.24970627, + "memory(GiB)": 71.19, + "step": 810, + "train_speed(iter/s)": 0.024261 + }, + { + "acc": 0.64044433, + "epoch": 0.64, + "learning_rate": 9.994731605594381e-05, + "loss": 1.19621964, + "memory(GiB)": 71.19, + "step": 815, + "train_speed(iter/s)": 0.024259 + }, + { + "acc": 0.64977989, + "epoch": 0.64, + "learning_rate": 9.994429808163556e-05, + "loss": 1.17283697, + "memory(GiB)": 71.19, + "step": 820, + "train_speed(iter/s)": 0.024256 + }, + { + "acc": 0.64557714, + "epoch": 0.65, + "learning_rate": 9.994119610450401e-05, + "loss": 1.18671465, + "memory(GiB)": 71.19, + "step": 825, + "train_speed(iter/s)": 0.024257 + }, + { + "acc": 0.66244955, + "epoch": 0.65, + "learning_rate": 9.993801012976647e-05, + "loss": 1.1419055, + "memory(GiB)": 71.19, + "step": 830, + "train_speed(iter/s)": 0.024255 + }, + { + "acc": 0.66743221, + "epoch": 0.65, + "learning_rate": 9.99347401627815e-05, + "loss": 1.10382252, + "memory(GiB)": 71.19, + "step": 835, + "train_speed(iter/s)": 0.024253 + }, + { + "acc": 0.65264039, + "epoch": 0.66, + "learning_rate": 9.993138620904901e-05, + "loss": 1.14855709, + "memory(GiB)": 71.19, + "step": 840, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.65064726, + "epoch": 0.66, + "learning_rate": 9.992794827421004e-05, + "loss": 1.15438948, + "memory(GiB)": 71.19, + "step": 845, + "train_speed(iter/s)": 0.024251 + }, + { + "acc": 0.64738302, + "epoch": 0.67, + "learning_rate": 9.992442636404701e-05, + "loss": 1.18792486, + "memory(GiB)": 71.19, + "step": 850, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.63004446, + "epoch": 0.67, + "learning_rate": 9.992082048448353e-05, + "loss": 1.24233475, + "memory(GiB)": 71.19, + "step": 855, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65914078, + "epoch": 0.67, + "learning_rate": 9.991713064158442e-05, + "loss": 1.13226604, + "memory(GiB)": 71.19, + "step": 860, + "train_speed(iter/s)": 0.024242 + }, + { + "acc": 0.65967417, + "epoch": 0.68, + "learning_rate": 9.991335684155574e-05, + "loss": 1.1242136, + "memory(GiB)": 71.19, + "step": 865, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.64683952, + "epoch": 0.68, + "learning_rate": 9.990949909074476e-05, + "loss": 1.14824209, + "memory(GiB)": 71.19, + "step": 870, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.64689598, + "epoch": 0.69, + "learning_rate": 9.990555739563994e-05, + "loss": 1.18441563, + "memory(GiB)": 71.19, + "step": 875, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.64474206, + "epoch": 0.69, + "learning_rate": 9.990153176287094e-05, + "loss": 1.17308598, + "memory(GiB)": 71.19, + "step": 880, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.64102311, + "epoch": 0.69, + "learning_rate": 9.989742219920861e-05, + "loss": 1.1535264, + "memory(GiB)": 71.19, + "step": 885, + "train_speed(iter/s)": 0.024246 + }, + { + "acc": 0.63249907, + "epoch": 0.7, + "learning_rate": 9.989322871156492e-05, + "loss": 1.24110155, + "memory(GiB)": 71.19, + "step": 890, + "train_speed(iter/s)": 0.024246 + }, + { + "acc": 0.65820117, + "epoch": 0.7, + "learning_rate": 9.988895130699305e-05, + "loss": 1.14757719, + "memory(GiB)": 71.19, + "step": 895, + "train_speed(iter/s)": 0.024247 + }, + { + "acc": 0.6568809, + "epoch": 0.71, + "learning_rate": 9.988458999268728e-05, + "loss": 1.1805316, + "memory(GiB)": 71.19, + "step": 900, + "train_speed(iter/s)": 0.024247 + }, + { + "acc": 0.65315137, + "epoch": 0.71, + "learning_rate": 9.988014477598304e-05, + "loss": 1.18291893, + "memory(GiB)": 71.19, + "step": 905, + "train_speed(iter/s)": 0.024248 + }, + { + "acc": 0.65974207, + "epoch": 0.71, + "learning_rate": 9.987561566435689e-05, + "loss": 1.11334858, + "memory(GiB)": 71.19, + "step": 910, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.66105995, + "epoch": 0.72, + "learning_rate": 9.987100266542644e-05, + "loss": 1.10593357, + "memory(GiB)": 71.19, + "step": 915, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65526924, + "epoch": 0.72, + "learning_rate": 9.986630578695047e-05, + "loss": 1.139046, + "memory(GiB)": 71.19, + "step": 920, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.64982562, + "epoch": 0.72, + "learning_rate": 9.986152503682879e-05, + "loss": 1.15638342, + "memory(GiB)": 71.19, + "step": 925, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.65965147, + "epoch": 0.73, + "learning_rate": 9.985666042310229e-05, + "loss": 1.10007477, + "memory(GiB)": 71.19, + "step": 930, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.64175363, + "epoch": 0.73, + "learning_rate": 9.98517119539529e-05, + "loss": 1.2035121, + "memory(GiB)": 71.19, + "step": 935, + "train_speed(iter/s)": 0.024246 + }, + { + "acc": 0.67316985, + "epoch": 0.74, + "learning_rate": 9.984667963770361e-05, + "loss": 1.0980998, + "memory(GiB)": 71.19, + "step": 940, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.65934963, + "epoch": 0.74, + "learning_rate": 9.984156348281842e-05, + "loss": 1.15233898, + "memory(GiB)": 71.19, + "step": 945, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65463395, + "epoch": 0.74, + "learning_rate": 9.983636349790235e-05, + "loss": 1.17552748, + "memory(GiB)": 71.19, + "step": 950, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.64568629, + "epoch": 0.75, + "learning_rate": 9.983107969170141e-05, + "loss": 1.18645792, + "memory(GiB)": 71.19, + "step": 955, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.64675536, + "epoch": 0.75, + "learning_rate": 9.982571207310259e-05, + "loss": 1.16107082, + "memory(GiB)": 71.19, + "step": 960, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.66028018, + "epoch": 0.76, + "learning_rate": 9.982026065113386e-05, + "loss": 1.14361153, + "memory(GiB)": 71.19, + "step": 965, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65468411, + "epoch": 0.76, + "learning_rate": 9.981472543496412e-05, + "loss": 1.14505243, + "memory(GiB)": 71.19, + "step": 970, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65591521, + "epoch": 0.76, + "learning_rate": 9.980910643390321e-05, + "loss": 1.12250948, + "memory(GiB)": 71.19, + "step": 975, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.66190786, + "epoch": 0.77, + "learning_rate": 9.980340365740193e-05, + "loss": 1.12542, + "memory(GiB)": 71.19, + "step": 980, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65410662, + "epoch": 0.77, + "learning_rate": 9.979761711505191e-05, + "loss": 1.14729557, + "memory(GiB)": 71.19, + "step": 985, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.66674676, + "epoch": 0.78, + "learning_rate": 9.979174681658574e-05, + "loss": 1.10079012, + "memory(GiB)": 71.19, + "step": 990, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.65360141, + "epoch": 0.78, + "learning_rate": 9.978579277187684e-05, + "loss": 1.16522446, + "memory(GiB)": 71.19, + "step": 995, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.66134276, + "epoch": 0.78, + "learning_rate": 9.97797549909395e-05, + "loss": 1.11977797, + "memory(GiB)": 71.19, + "step": 1000, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65579562, + "epoch": 0.79, + "learning_rate": 9.977363348392886e-05, + "loss": 1.15444326, + "memory(GiB)": 71.19, + "step": 1005, + "train_speed(iter/s)": 0.024244 + }, + { + "acc": 0.66116667, + "epoch": 0.79, + "learning_rate": 9.976742826114083e-05, + "loss": 1.12424622, + "memory(GiB)": 71.19, + "step": 1010, + "train_speed(iter/s)": 0.024245 + }, + { + "acc": 0.6683176, + "epoch": 0.8, + "learning_rate": 9.97611393330122e-05, + "loss": 1.11872005, + "memory(GiB)": 71.19, + "step": 1015, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65369892, + "epoch": 0.8, + "learning_rate": 9.975476671012049e-05, + "loss": 1.1429491, + "memory(GiB)": 71.19, + "step": 1020, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.64377575, + "epoch": 0.8, + "learning_rate": 9.974831040318403e-05, + "loss": 1.19284143, + "memory(GiB)": 71.19, + "step": 1025, + "train_speed(iter/s)": 0.024243 + }, + { + "acc": 0.65243697, + "epoch": 0.81, + "learning_rate": 9.974177042306183e-05, + "loss": 1.157862, + "memory(GiB)": 71.19, + "step": 1030, + "train_speed(iter/s)": 0.02424 + }, + { + "acc": 0.64958396, + "epoch": 0.81, + "learning_rate": 9.973514678075372e-05, + "loss": 1.18240442, + "memory(GiB)": 71.19, + "step": 1035, + "train_speed(iter/s)": 0.02424 + }, + { + "acc": 0.6682889, + "epoch": 0.82, + "learning_rate": 9.972843948740019e-05, + "loss": 1.11338902, + "memory(GiB)": 71.19, + "step": 1040, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.65257483, + "epoch": 0.82, + "learning_rate": 9.972164855428244e-05, + "loss": 1.13974934, + "memory(GiB)": 71.19, + "step": 1045, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.6569396, + "epoch": 0.82, + "learning_rate": 9.971477399282236e-05, + "loss": 1.13563824, + "memory(GiB)": 71.19, + "step": 1050, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.66365395, + "epoch": 0.83, + "learning_rate": 9.970781581458246e-05, + "loss": 1.13148432, + "memory(GiB)": 71.19, + "step": 1055, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.66655474, + "epoch": 0.83, + "learning_rate": 9.970077403126592e-05, + "loss": 1.12719841, + "memory(GiB)": 71.19, + "step": 1060, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.65451961, + "epoch": 0.83, + "learning_rate": 9.969364865471654e-05, + "loss": 1.17055464, + "memory(GiB)": 71.19, + "step": 1065, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.66282625, + "epoch": 0.84, + "learning_rate": 9.968643969691868e-05, + "loss": 1.11539078, + "memory(GiB)": 71.19, + "step": 1070, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.66324406, + "epoch": 0.84, + "learning_rate": 9.96791471699973e-05, + "loss": 1.12122641, + "memory(GiB)": 71.19, + "step": 1075, + "train_speed(iter/s)": 0.024237 + }, + { + "acc": 0.66220465, + "epoch": 0.85, + "learning_rate": 9.967177108621798e-05, + "loss": 1.1304287, + "memory(GiB)": 71.19, + "step": 1080, + "train_speed(iter/s)": 0.024237 + }, + { + "acc": 0.65334153, + "epoch": 0.85, + "learning_rate": 9.966431145798672e-05, + "loss": 1.15781803, + "memory(GiB)": 71.19, + "step": 1085, + "train_speed(iter/s)": 0.024235 + }, + { + "acc": 0.6635685, + "epoch": 0.85, + "learning_rate": 9.965676829785011e-05, + "loss": 1.10384178, + "memory(GiB)": 71.19, + "step": 1090, + "train_speed(iter/s)": 0.024235 + }, + { + "acc": 0.66880417, + "epoch": 0.86, + "learning_rate": 9.964914161849522e-05, + "loss": 1.1105998, + "memory(GiB)": 71.19, + "step": 1095, + "train_speed(iter/s)": 0.024235 + }, + { + "acc": 0.65008655, + "epoch": 0.86, + "learning_rate": 9.96414314327496e-05, + "loss": 1.15002518, + "memory(GiB)": 71.19, + "step": 1100, + "train_speed(iter/s)": 0.024236 + }, + { + "acc": 0.660391, + "epoch": 0.87, + "learning_rate": 9.963363775358123e-05, + "loss": 1.13650742, + "memory(GiB)": 71.19, + "step": 1105, + "train_speed(iter/s)": 0.024236 + }, + { + "acc": 0.65939445, + "epoch": 0.87, + "learning_rate": 9.962576059409854e-05, + "loss": 1.14001417, + "memory(GiB)": 71.19, + "step": 1110, + "train_speed(iter/s)": 0.024236 + }, + { + "acc": 0.65705018, + "epoch": 0.87, + "learning_rate": 9.961779996755036e-05, + "loss": 1.13998594, + "memory(GiB)": 71.19, + "step": 1115, + "train_speed(iter/s)": 0.024237 + }, + { + "acc": 0.65304127, + "epoch": 0.88, + "learning_rate": 9.96097558873259e-05, + "loss": 1.13927307, + "memory(GiB)": 71.19, + "step": 1120, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.67214541, + "epoch": 0.88, + "learning_rate": 9.960162836695478e-05, + "loss": 1.10992355, + "memory(GiB)": 71.19, + "step": 1125, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.64525504, + "epoch": 0.89, + "learning_rate": 9.959341742010688e-05, + "loss": 1.18042841, + "memory(GiB)": 71.19, + "step": 1130, + "train_speed(iter/s)": 0.024239 + }, + { + "acc": 0.6544466, + "epoch": 0.89, + "learning_rate": 9.958512306059247e-05, + "loss": 1.14733744, + "memory(GiB)": 71.19, + "step": 1135, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.65201364, + "epoch": 0.89, + "learning_rate": 9.957674530236205e-05, + "loss": 1.13061304, + "memory(GiB)": 71.19, + "step": 1140, + "train_speed(iter/s)": 0.024239 + }, + { + "acc": 0.66318994, + "epoch": 0.9, + "learning_rate": 9.956828415950645e-05, + "loss": 1.1062932, + "memory(GiB)": 71.19, + "step": 1145, + "train_speed(iter/s)": 0.02424 + }, + { + "acc": 0.65637937, + "epoch": 0.9, + "learning_rate": 9.955973964625672e-05, + "loss": 1.14308052, + "memory(GiB)": 71.19, + "step": 1150, + "train_speed(iter/s)": 0.024241 + }, + { + "acc": 0.65574675, + "epoch": 0.91, + "learning_rate": 9.955111177698412e-05, + "loss": 1.14501066, + "memory(GiB)": 71.19, + "step": 1155, + "train_speed(iter/s)": 0.024241 + }, + { + "acc": 0.6562243, + "epoch": 0.91, + "learning_rate": 9.954240056620014e-05, + "loss": 1.16590223, + "memory(GiB)": 71.19, + "step": 1160, + "train_speed(iter/s)": 0.024241 + }, + { + "acc": 0.66819267, + "epoch": 0.91, + "learning_rate": 9.953360602855641e-05, + "loss": 1.10895786, + "memory(GiB)": 71.19, + "step": 1165, + "train_speed(iter/s)": 0.024241 + }, + { + "acc": 0.66646504, + "epoch": 0.92, + "learning_rate": 9.952472817884476e-05, + "loss": 1.13673782, + "memory(GiB)": 71.19, + "step": 1170, + "train_speed(iter/s)": 0.024239 + }, + { + "acc": 0.67009692, + "epoch": 0.92, + "learning_rate": 9.951576703199708e-05, + "loss": 1.0828022, + "memory(GiB)": 71.19, + "step": 1175, + "train_speed(iter/s)": 0.024239 + }, + { + "acc": 0.64940991, + "epoch": 0.92, + "learning_rate": 9.95067226030854e-05, + "loss": 1.18824711, + "memory(GiB)": 71.19, + "step": 1180, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.67086515, + "epoch": 0.93, + "learning_rate": 9.949759490732185e-05, + "loss": 1.11339579, + "memory(GiB)": 71.19, + "step": 1185, + "train_speed(iter/s)": 0.024238 + }, + { + "acc": 0.65760446, + "epoch": 0.93, + "learning_rate": 9.948838396005854e-05, + "loss": 1.1283742, + "memory(GiB)": 71.19, + "step": 1190, + "train_speed(iter/s)": 0.024239 + }, + { + "acc": 0.66921711, + "epoch": 0.94, + "learning_rate": 9.947908977678766e-05, + "loss": 1.09572687, + "memory(GiB)": 71.19, + "step": 1195, + "train_speed(iter/s)": 0.024237 + }, + { + "acc": 0.65912962, + "epoch": 0.94, + "learning_rate": 9.946971237314136e-05, + "loss": 1.14600134, + "memory(GiB)": 71.19, + "step": 1200, + "train_speed(iter/s)": 0.024236 + }, + { + "acc": 0.66213255, + "epoch": 0.94, + "learning_rate": 9.94602517648918e-05, + "loss": 1.12797499, + "memory(GiB)": 71.19, + "step": 1205, + "train_speed(iter/s)": 0.024234 + }, + { + "acc": 0.64736218, + "epoch": 0.95, + "learning_rate": 9.945070796795105e-05, + "loss": 1.15010433, + "memory(GiB)": 71.19, + "step": 1210, + "train_speed(iter/s)": 0.024234 + }, + { + "acc": 0.65648313, + "epoch": 0.95, + "learning_rate": 9.94410809983711e-05, + "loss": 1.13810177, + "memory(GiB)": 71.19, + "step": 1215, + "train_speed(iter/s)": 0.024232 + }, + { + "acc": 0.65412345, + "epoch": 0.96, + "learning_rate": 9.943137087234385e-05, + "loss": 1.14729261, + "memory(GiB)": 71.19, + "step": 1220, + "train_speed(iter/s)": 0.024231 + }, + { + "acc": 0.65371003, + "epoch": 0.96, + "learning_rate": 9.942157760620108e-05, + "loss": 1.15231876, + "memory(GiB)": 71.19, + "step": 1225, + "train_speed(iter/s)": 0.024231 + }, + { + "acc": 0.6497436, + "epoch": 0.96, + "learning_rate": 9.941170121641435e-05, + "loss": 1.15340414, + "memory(GiB)": 71.19, + "step": 1230, + "train_speed(iter/s)": 0.024231 + }, + { + "acc": 0.66141253, + "epoch": 0.97, + "learning_rate": 9.940174171959504e-05, + "loss": 1.12856808, + "memory(GiB)": 71.19, + "step": 1235, + "train_speed(iter/s)": 0.024232 + }, + { + "acc": 0.66497359, + "epoch": 0.97, + "learning_rate": 9.939169913249438e-05, + "loss": 1.12229996, + "memory(GiB)": 71.19, + "step": 1240, + "train_speed(iter/s)": 0.024232 + }, + { + "acc": 0.67611599, + "epoch": 0.98, + "learning_rate": 9.938157347200327e-05, + "loss": 1.0430521, + "memory(GiB)": 71.19, + "step": 1245, + "train_speed(iter/s)": 0.024233 + }, + { + "acc": 0.65242562, + "epoch": 0.98, + "learning_rate": 9.937136475515237e-05, + "loss": 1.16849995, + "memory(GiB)": 71.19, + "step": 1250, + "train_speed(iter/s)": 0.02423 + }, + { + "acc": 0.65185585, + "epoch": 0.98, + "learning_rate": 9.936107299911203e-05, + "loss": 1.17074966, + "memory(GiB)": 71.19, + "step": 1255, + "train_speed(iter/s)": 0.024232 + }, + { + "acc": 0.6592226, + "epoch": 0.99, + "learning_rate": 9.935069822119226e-05, + "loss": 1.11288481, + "memory(GiB)": 71.19, + "step": 1260, + "train_speed(iter/s)": 0.024232 + }, + { + "acc": 0.66193151, + "epoch": 0.99, + "learning_rate": 9.934024043884271e-05, + "loss": 1.16120825, + "memory(GiB)": 71.19, + "step": 1265, + "train_speed(iter/s)": 0.024232 + }, + { + "acc": 0.66481395, + "epoch": 1.0, + "learning_rate": 9.932969966965267e-05, + "loss": 1.09738159, + "memory(GiB)": 71.19, + "step": 1270, + "train_speed(iter/s)": 0.024232 + }, + { + "acc": 0.65508566, + "epoch": 1.0, + "learning_rate": 9.931907593135093e-05, + "loss": 1.14331837, + "memory(GiB)": 71.19, + "step": 1275, + "train_speed(iter/s)": 0.024232 + }, + { + "epoch": 1.0, + "eval_acc": 0.6856999749561733, + "eval_loss": 1.018600344657898, + "eval_runtime": 107.9063, + "eval_samples_per_second": 0.862, + "eval_steps_per_second": 0.862, + "step": 1275 + }, + { + "acc": 0.68564477, + "epoch": 1.0, + "learning_rate": 9.93083692418059e-05, + "loss": 1.04102726, + "memory(GiB)": 71.19, + "step": 1280, + "train_speed(iter/s)": 0.024182 + }, + { + "acc": 0.66535759, + "epoch": 1.01, + "learning_rate": 9.929757961902549e-05, + "loss": 1.10843706, + "memory(GiB)": 71.19, + "step": 1285, + "train_speed(iter/s)": 0.02418 + }, + { + "acc": 0.67624335, + "epoch": 1.01, + "learning_rate": 9.928670708115708e-05, + "loss": 1.03700457, + "memory(GiB)": 71.19, + "step": 1290, + "train_speed(iter/s)": 0.02418 + }, + { + "acc": 0.6848104, + "epoch": 1.01, + "learning_rate": 9.927575164648754e-05, + "loss": 1.01924419, + "memory(GiB)": 71.19, + "step": 1295, + "train_speed(iter/s)": 0.024179 + }, + { + "acc": 0.65978332, + "epoch": 1.02, + "learning_rate": 9.926471333344311e-05, + "loss": 1.10564556, + "memory(GiB)": 71.19, + "step": 1300, + "train_speed(iter/s)": 0.024181 + }, + { + "acc": 0.66694851, + "epoch": 1.02, + "learning_rate": 9.925359216058952e-05, + "loss": 1.06940336, + "memory(GiB)": 71.19, + "step": 1305, + "train_speed(iter/s)": 0.024181 + }, + { + "acc": 0.67258253, + "epoch": 1.03, + "learning_rate": 9.924238814663173e-05, + "loss": 1.05483885, + "memory(GiB)": 71.19, + "step": 1310, + "train_speed(iter/s)": 0.024181 + }, + { + "acc": 0.67171688, + "epoch": 1.03, + "learning_rate": 9.923110131041419e-05, + "loss": 1.0581152, + "memory(GiB)": 71.19, + "step": 1315, + "train_speed(iter/s)": 0.024182 + }, + { + "acc": 0.66035485, + "epoch": 1.03, + "learning_rate": 9.92197316709205e-05, + "loss": 1.10017042, + "memory(GiB)": 71.19, + "step": 1320, + "train_speed(iter/s)": 0.024182 + }, + { + "acc": 0.67783785, + "epoch": 1.04, + "learning_rate": 9.920827924727366e-05, + "loss": 1.05424404, + "memory(GiB)": 71.19, + "step": 1325, + "train_speed(iter/s)": 0.024182 + }, + { + "acc": 0.67227278, + "epoch": 1.04, + "learning_rate": 9.91967440587358e-05, + "loss": 1.07807531, + "memory(GiB)": 71.19, + "step": 1330, + "train_speed(iter/s)": 0.024182 + }, + { + "acc": 0.65477858, + "epoch": 1.05, + "learning_rate": 9.918512612470834e-05, + "loss": 1.10319042, + "memory(GiB)": 71.19, + "step": 1335, + "train_speed(iter/s)": 0.024182 + }, + { + "acc": 0.67027154, + "epoch": 1.05, + "learning_rate": 9.917342546473181e-05, + "loss": 1.07305937, + "memory(GiB)": 71.19, + "step": 1340, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.66486812, + "epoch": 1.05, + "learning_rate": 9.916164209848588e-05, + "loss": 1.11207981, + "memory(GiB)": 71.19, + "step": 1345, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.66329069, + "epoch": 1.06, + "learning_rate": 9.914977604578941e-05, + "loss": 1.10326147, + "memory(GiB)": 71.19, + "step": 1350, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.68474355, + "epoch": 1.06, + "learning_rate": 9.913782732660024e-05, + "loss": 1.02233744, + "memory(GiB)": 71.19, + "step": 1355, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.67932301, + "epoch": 1.07, + "learning_rate": 9.912579596101525e-05, + "loss": 1.06197271, + "memory(GiB)": 71.19, + "step": 1360, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.66768179, + "epoch": 1.07, + "learning_rate": 9.911368196927043e-05, + "loss": 1.07399158, + "memory(GiB)": 71.19, + "step": 1365, + "train_speed(iter/s)": 0.024184 + }, + { + "acc": 0.67560496, + "epoch": 1.07, + "learning_rate": 9.91014853717406e-05, + "loss": 1.05643578, + "memory(GiB)": 71.19, + "step": 1370, + "train_speed(iter/s)": 0.024184 + }, + { + "acc": 0.67212873, + "epoch": 1.08, + "learning_rate": 9.908920618893962e-05, + "loss": 1.0583807, + "memory(GiB)": 71.19, + "step": 1375, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.66627192, + "epoch": 1.08, + "learning_rate": 9.90768444415202e-05, + "loss": 1.07695656, + "memory(GiB)": 71.19, + "step": 1380, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.66721344, + "epoch": 1.09, + "learning_rate": 9.906440015027399e-05, + "loss": 1.04808693, + "memory(GiB)": 71.19, + "step": 1385, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.67704191, + "epoch": 1.09, + "learning_rate": 9.905187333613134e-05, + "loss": 1.06024771, + "memory(GiB)": 71.19, + "step": 1390, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.66961083, + "epoch": 1.09, + "learning_rate": 9.903926402016153e-05, + "loss": 1.07331381, + "memory(GiB)": 71.19, + "step": 1395, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.67959518, + "epoch": 1.1, + "learning_rate": 9.902657222357252e-05, + "loss": 1.04364729, + "memory(GiB)": 71.19, + "step": 1400, + "train_speed(iter/s)": 0.024183 + }, + { + "acc": 0.66278548, + "epoch": 1.1, + "learning_rate": 9.901379796771107e-05, + "loss": 1.08651028, + "memory(GiB)": 71.19, + "step": 1405, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.65881038, + "epoch": 1.11, + "learning_rate": 9.900094127406253e-05, + "loss": 1.09732819, + "memory(GiB)": 71.19, + "step": 1410, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.66367035, + "epoch": 1.11, + "learning_rate": 9.898800216425099e-05, + "loss": 1.083144, + "memory(GiB)": 71.19, + "step": 1415, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.65925193, + "epoch": 1.11, + "learning_rate": 9.897498066003913e-05, + "loss": 1.12018261, + "memory(GiB)": 71.19, + "step": 1420, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.67458215, + "epoch": 1.12, + "learning_rate": 9.89618767833282e-05, + "loss": 1.06256332, + "memory(GiB)": 71.19, + "step": 1425, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.66883864, + "epoch": 1.12, + "learning_rate": 9.894869055615803e-05, + "loss": 1.1081831, + "memory(GiB)": 71.19, + "step": 1430, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.66981688, + "epoch": 1.12, + "learning_rate": 9.893542200070691e-05, + "loss": 1.07718334, + "memory(GiB)": 71.19, + "step": 1435, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.67857156, + "epoch": 1.13, + "learning_rate": 9.892207113929164e-05, + "loss": 1.04210625, + "memory(GiB)": 71.19, + "step": 1440, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.66746936, + "epoch": 1.13, + "learning_rate": 9.890863799436743e-05, + "loss": 1.08562384, + "memory(GiB)": 71.19, + "step": 1445, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.65901647, + "epoch": 1.14, + "learning_rate": 9.889512258852789e-05, + "loss": 1.09942312, + "memory(GiB)": 71.19, + "step": 1450, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.67867947, + "epoch": 1.14, + "learning_rate": 9.888152494450498e-05, + "loss": 1.06383791, + "memory(GiB)": 71.19, + "step": 1455, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.66191411, + "epoch": 1.14, + "learning_rate": 9.886784508516901e-05, + "loss": 1.09565401, + "memory(GiB)": 71.19, + "step": 1460, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.67523494, + "epoch": 1.15, + "learning_rate": 9.885408303352854e-05, + "loss": 1.07071381, + "memory(GiB)": 71.19, + "step": 1465, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.68043942, + "epoch": 1.15, + "learning_rate": 9.884023881273037e-05, + "loss": 1.05898991, + "memory(GiB)": 71.19, + "step": 1470, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.66896992, + "epoch": 1.16, + "learning_rate": 9.882631244605952e-05, + "loss": 1.08150454, + "memory(GiB)": 71.19, + "step": 1475, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.66858001, + "epoch": 1.16, + "learning_rate": 9.881230395693917e-05, + "loss": 1.07318954, + "memory(GiB)": 71.19, + "step": 1480, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.67361059, + "epoch": 1.16, + "learning_rate": 9.879821336893062e-05, + "loss": 1.0633007, + "memory(GiB)": 71.19, + "step": 1485, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.66314216, + "epoch": 1.17, + "learning_rate": 9.878404070573327e-05, + "loss": 1.10530052, + "memory(GiB)": 71.19, + "step": 1490, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.66712093, + "epoch": 1.17, + "learning_rate": 9.876978599118452e-05, + "loss": 1.074893, + "memory(GiB)": 71.19, + "step": 1495, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.67996116, + "epoch": 1.18, + "learning_rate": 9.875544924925981e-05, + "loss": 1.03155136, + "memory(GiB)": 71.19, + "step": 1500, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.66647635, + "epoch": 1.18, + "learning_rate": 9.874103050407256e-05, + "loss": 1.07530098, + "memory(GiB)": 71.19, + "step": 1505, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.66543493, + "epoch": 1.18, + "learning_rate": 9.872652977987408e-05, + "loss": 1.0699563, + "memory(GiB)": 71.19, + "step": 1510, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.66304975, + "epoch": 1.19, + "learning_rate": 9.871194710105357e-05, + "loss": 1.10289001, + "memory(GiB)": 71.19, + "step": 1515, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.67143154, + "epoch": 1.19, + "learning_rate": 9.86972824921381e-05, + "loss": 1.07342091, + "memory(GiB)": 71.19, + "step": 1520, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.66971889, + "epoch": 1.2, + "learning_rate": 9.868253597779249e-05, + "loss": 1.08200026, + "memory(GiB)": 71.19, + "step": 1525, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.66585174, + "epoch": 1.2, + "learning_rate": 9.866770758281937e-05, + "loss": 1.07775135, + "memory(GiB)": 71.19, + "step": 1530, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.67415438, + "epoch": 1.2, + "learning_rate": 9.865279733215905e-05, + "loss": 1.03115015, + "memory(GiB)": 71.19, + "step": 1535, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.6658217, + "epoch": 1.21, + "learning_rate": 9.863780525088955e-05, + "loss": 1.11420527, + "memory(GiB)": 71.19, + "step": 1540, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.68223243, + "epoch": 1.21, + "learning_rate": 9.862273136422648e-05, + "loss": 1.03579702, + "memory(GiB)": 71.19, + "step": 1545, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.68120694, + "epoch": 1.21, + "learning_rate": 9.860757569752309e-05, + "loss": 1.06588163, + "memory(GiB)": 71.19, + "step": 1550, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.65559621, + "epoch": 1.22, + "learning_rate": 9.859233827627013e-05, + "loss": 1.12858868, + "memory(GiB)": 71.19, + "step": 1555, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.67324719, + "epoch": 1.22, + "learning_rate": 9.857701912609589e-05, + "loss": 1.10106726, + "memory(GiB)": 71.19, + "step": 1560, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.68056149, + "epoch": 1.23, + "learning_rate": 9.856161827276613e-05, + "loss": 1.03890066, + "memory(GiB)": 71.19, + "step": 1565, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.66202497, + "epoch": 1.23, + "learning_rate": 9.854613574218396e-05, + "loss": 1.10919619, + "memory(GiB)": 71.19, + "step": 1570, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.66488538, + "epoch": 1.23, + "learning_rate": 9.853057156038998e-05, + "loss": 1.07370605, + "memory(GiB)": 71.19, + "step": 1575, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.66696358, + "epoch": 1.24, + "learning_rate": 9.851492575356201e-05, + "loss": 1.0641902, + "memory(GiB)": 71.19, + "step": 1580, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.68181887, + "epoch": 1.24, + "learning_rate": 9.849919834801522e-05, + "loss": 1.04376001, + "memory(GiB)": 71.19, + "step": 1585, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67546906, + "epoch": 1.25, + "learning_rate": 9.8483389370202e-05, + "loss": 1.03430681, + "memory(GiB)": 71.19, + "step": 1590, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.67184181, + "epoch": 1.25, + "learning_rate": 9.846749884671198e-05, + "loss": 1.09638357, + "memory(GiB)": 71.19, + "step": 1595, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.66743326, + "epoch": 1.25, + "learning_rate": 9.845152680427186e-05, + "loss": 1.08324871, + "memory(GiB)": 71.19, + "step": 1600, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67404108, + "epoch": 1.26, + "learning_rate": 9.843547326974555e-05, + "loss": 1.05259237, + "memory(GiB)": 71.19, + "step": 1605, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.67859106, + "epoch": 1.26, + "learning_rate": 9.841933827013394e-05, + "loss": 1.02058706, + "memory(GiB)": 71.19, + "step": 1610, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67385159, + "epoch": 1.27, + "learning_rate": 9.840312183257498e-05, + "loss": 1.0660202, + "memory(GiB)": 71.19, + "step": 1615, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68775396, + "epoch": 1.27, + "learning_rate": 9.83868239843436e-05, + "loss": 1.01889, + "memory(GiB)": 71.19, + "step": 1620, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.67444701, + "epoch": 1.27, + "learning_rate": 9.837044475285165e-05, + "loss": 1.0787632, + "memory(GiB)": 71.19, + "step": 1625, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.67301526, + "epoch": 1.28, + "learning_rate": 9.83539841656478e-05, + "loss": 1.07971487, + "memory(GiB)": 71.19, + "step": 1630, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.6624773, + "epoch": 1.28, + "learning_rate": 9.833744225041767e-05, + "loss": 1.07760115, + "memory(GiB)": 71.19, + "step": 1635, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.67510743, + "epoch": 1.29, + "learning_rate": 9.832081903498359e-05, + "loss": 1.06419029, + "memory(GiB)": 71.19, + "step": 1640, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.68025455, + "epoch": 1.29, + "learning_rate": 9.830411454730464e-05, + "loss": 1.03052721, + "memory(GiB)": 71.19, + "step": 1645, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.66855297, + "epoch": 1.29, + "learning_rate": 9.82873288154766e-05, + "loss": 1.06540508, + "memory(GiB)": 71.19, + "step": 1650, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.68221936, + "epoch": 1.3, + "learning_rate": 9.82704618677319e-05, + "loss": 1.02048054, + "memory(GiB)": 71.19, + "step": 1655, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.67271628, + "epoch": 1.3, + "learning_rate": 9.825351373243957e-05, + "loss": 1.05244284, + "memory(GiB)": 71.19, + "step": 1660, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.67585492, + "epoch": 1.3, + "learning_rate": 9.82364844381052e-05, + "loss": 1.03952274, + "memory(GiB)": 71.19, + "step": 1665, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.67327933, + "epoch": 1.31, + "learning_rate": 9.821937401337087e-05, + "loss": 1.06375408, + "memory(GiB)": 71.19, + "step": 1670, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.67473063, + "epoch": 1.31, + "learning_rate": 9.820218248701512e-05, + "loss": 1.03807678, + "memory(GiB)": 71.19, + "step": 1675, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.66647797, + "epoch": 1.32, + "learning_rate": 9.818490988795289e-05, + "loss": 1.08794069, + "memory(GiB)": 71.19, + "step": 1680, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.6725491, + "epoch": 1.32, + "learning_rate": 9.816755624523548e-05, + "loss": 1.04948406, + "memory(GiB)": 71.19, + "step": 1685, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.67765465, + "epoch": 1.32, + "learning_rate": 9.815012158805054e-05, + "loss": 1.07902899, + "memory(GiB)": 71.19, + "step": 1690, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.68021631, + "epoch": 1.33, + "learning_rate": 9.813260594572192e-05, + "loss": 1.03020191, + "memory(GiB)": 71.19, + "step": 1695, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.66895852, + "epoch": 1.33, + "learning_rate": 9.811500934770969e-05, + "loss": 1.10171871, + "memory(GiB)": 71.19, + "step": 1700, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.65738878, + "epoch": 1.34, + "learning_rate": 9.809733182361014e-05, + "loss": 1.12308722, + "memory(GiB)": 71.19, + "step": 1705, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.68673587, + "epoch": 1.34, + "learning_rate": 9.80795734031556e-05, + "loss": 1.04205341, + "memory(GiB)": 71.19, + "step": 1710, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.66318197, + "epoch": 1.34, + "learning_rate": 9.806173411621451e-05, + "loss": 1.07137575, + "memory(GiB)": 71.19, + "step": 1715, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.67586851, + "epoch": 1.35, + "learning_rate": 9.80438139927913e-05, + "loss": 1.04353619, + "memory(GiB)": 71.19, + "step": 1720, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.67039175, + "epoch": 1.35, + "learning_rate": 9.802581306302638e-05, + "loss": 1.0834878, + "memory(GiB)": 71.19, + "step": 1725, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.67330465, + "epoch": 1.36, + "learning_rate": 9.800773135719604e-05, + "loss": 1.04026947, + "memory(GiB)": 71.19, + "step": 1730, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.65766926, + "epoch": 1.36, + "learning_rate": 9.798956890571244e-05, + "loss": 1.11574898, + "memory(GiB)": 71.19, + "step": 1735, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.66705904, + "epoch": 1.36, + "learning_rate": 9.79713257391236e-05, + "loss": 1.09899406, + "memory(GiB)": 71.19, + "step": 1740, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.66609573, + "epoch": 1.37, + "learning_rate": 9.795300188811319e-05, + "loss": 1.10992451, + "memory(GiB)": 71.19, + "step": 1745, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.6764576, + "epoch": 1.37, + "learning_rate": 9.793459738350069e-05, + "loss": 1.05841427, + "memory(GiB)": 71.19, + "step": 1750, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.65810442, + "epoch": 1.38, + "learning_rate": 9.791611225624118e-05, + "loss": 1.12776289, + "memory(GiB)": 71.19, + "step": 1755, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.6705864, + "epoch": 1.38, + "learning_rate": 9.789754653742537e-05, + "loss": 1.07466335, + "memory(GiB)": 71.19, + "step": 1760, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.67177191, + "epoch": 1.38, + "learning_rate": 9.787890025827948e-05, + "loss": 1.08449554, + "memory(GiB)": 71.19, + "step": 1765, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.67626877, + "epoch": 1.39, + "learning_rate": 9.786017345016524e-05, + "loss": 1.06005878, + "memory(GiB)": 71.19, + "step": 1770, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.68222189, + "epoch": 1.39, + "learning_rate": 9.784136614457988e-05, + "loss": 1.06014185, + "memory(GiB)": 71.19, + "step": 1775, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.66160769, + "epoch": 1.4, + "learning_rate": 9.782247837315595e-05, + "loss": 1.11386976, + "memory(GiB)": 71.19, + "step": 1780, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.68455191, + "epoch": 1.4, + "learning_rate": 9.780351016766136e-05, + "loss": 1.04914351, + "memory(GiB)": 71.19, + "step": 1785, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.66824789, + "epoch": 1.4, + "learning_rate": 9.77844615599993e-05, + "loss": 1.05908461, + "memory(GiB)": 71.19, + "step": 1790, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.66619282, + "epoch": 1.41, + "learning_rate": 9.776533258220819e-05, + "loss": 1.09789734, + "memory(GiB)": 71.19, + "step": 1795, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.67257457, + "epoch": 1.41, + "learning_rate": 9.774612326646169e-05, + "loss": 1.05887022, + "memory(GiB)": 71.19, + "step": 1800, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.67153988, + "epoch": 1.41, + "learning_rate": 9.772683364506847e-05, + "loss": 1.07383604, + "memory(GiB)": 71.19, + "step": 1805, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.67205477, + "epoch": 1.42, + "learning_rate": 9.770746375047235e-05, + "loss": 1.06401491, + "memory(GiB)": 71.19, + "step": 1810, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.66954546, + "epoch": 1.42, + "learning_rate": 9.768801361525215e-05, + "loss": 1.08306198, + "memory(GiB)": 71.19, + "step": 1815, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.68025694, + "epoch": 1.43, + "learning_rate": 9.766848327212161e-05, + "loss": 1.02885761, + "memory(GiB)": 71.19, + "step": 1820, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.68164105, + "epoch": 1.43, + "learning_rate": 9.764887275392943e-05, + "loss": 1.05446472, + "memory(GiB)": 71.19, + "step": 1825, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.66762023, + "epoch": 1.43, + "learning_rate": 9.762918209365913e-05, + "loss": 1.10211878, + "memory(GiB)": 71.19, + "step": 1830, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67623882, + "epoch": 1.44, + "learning_rate": 9.760941132442902e-05, + "loss": 1.06018152, + "memory(GiB)": 71.19, + "step": 1835, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.6666759, + "epoch": 1.44, + "learning_rate": 9.758956047949215e-05, + "loss": 1.11666231, + "memory(GiB)": 71.19, + "step": 1840, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67013221, + "epoch": 1.45, + "learning_rate": 9.756962959223628e-05, + "loss": 1.06344328, + "memory(GiB)": 71.19, + "step": 1845, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.66876597, + "epoch": 1.45, + "learning_rate": 9.754961869618373e-05, + "loss": 1.08094902, + "memory(GiB)": 71.19, + "step": 1850, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.66471753, + "epoch": 1.45, + "learning_rate": 9.752952782499147e-05, + "loss": 1.0886961, + "memory(GiB)": 71.19, + "step": 1855, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.67831697, + "epoch": 1.46, + "learning_rate": 9.750935701245092e-05, + "loss": 1.03748837, + "memory(GiB)": 71.19, + "step": 1860, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.6669693, + "epoch": 1.46, + "learning_rate": 9.748910629248798e-05, + "loss": 1.08023806, + "memory(GiB)": 71.19, + "step": 1865, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.67126279, + "epoch": 1.47, + "learning_rate": 9.746877569916297e-05, + "loss": 1.07919216, + "memory(GiB)": 71.19, + "step": 1870, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67684112, + "epoch": 1.47, + "learning_rate": 9.74483652666705e-05, + "loss": 1.04165659, + "memory(GiB)": 71.19, + "step": 1875, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.68273034, + "epoch": 1.47, + "learning_rate": 9.74278750293395e-05, + "loss": 1.04425545, + "memory(GiB)": 71.19, + "step": 1880, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.66332841, + "epoch": 1.48, + "learning_rate": 9.740730502163311e-05, + "loss": 1.07659769, + "memory(GiB)": 71.19, + "step": 1885, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68010182, + "epoch": 1.48, + "learning_rate": 9.738665527814867e-05, + "loss": 1.0395319, + "memory(GiB)": 71.19, + "step": 1890, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68821034, + "epoch": 1.49, + "learning_rate": 9.736592583361762e-05, + "loss": 1.01133175, + "memory(GiB)": 71.19, + "step": 1895, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.66931405, + "epoch": 1.49, + "learning_rate": 9.73451167229054e-05, + "loss": 1.06255102, + "memory(GiB)": 71.19, + "step": 1900, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67967229, + "epoch": 1.49, + "learning_rate": 9.732422798101149e-05, + "loss": 1.02841129, + "memory(GiB)": 71.19, + "step": 1905, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67584767, + "epoch": 1.5, + "learning_rate": 9.73032596430693e-05, + "loss": 1.05436392, + "memory(GiB)": 71.19, + "step": 1910, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.66209011, + "epoch": 1.5, + "learning_rate": 9.728221174434613e-05, + "loss": 1.10967607, + "memory(GiB)": 71.19, + "step": 1915, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.68050909, + "epoch": 1.5, + "learning_rate": 9.726108432024306e-05, + "loss": 1.02168255, + "memory(GiB)": 71.19, + "step": 1920, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.68102293, + "epoch": 1.51, + "learning_rate": 9.723987740629494e-05, + "loss": 1.05781345, + "memory(GiB)": 71.19, + "step": 1925, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.6717145, + "epoch": 1.51, + "learning_rate": 9.721859103817033e-05, + "loss": 1.06613684, + "memory(GiB)": 71.19, + "step": 1930, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.66321344, + "epoch": 1.52, + "learning_rate": 9.719722525167141e-05, + "loss": 1.09566288, + "memory(GiB)": 71.19, + "step": 1935, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.6747292, + "epoch": 1.52, + "learning_rate": 9.717578008273393e-05, + "loss": 1.04526234, + "memory(GiB)": 71.19, + "step": 1940, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.68077927, + "epoch": 1.52, + "learning_rate": 9.715425556742716e-05, + "loss": 1.02439919, + "memory(GiB)": 71.19, + "step": 1945, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.67475858, + "epoch": 1.53, + "learning_rate": 9.713265174195388e-05, + "loss": 1.05720959, + "memory(GiB)": 71.19, + "step": 1950, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.66691504, + "epoch": 1.53, + "learning_rate": 9.711096864265016e-05, + "loss": 1.08331633, + "memory(GiB)": 71.19, + "step": 1955, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.66824608, + "epoch": 1.54, + "learning_rate": 9.708920630598551e-05, + "loss": 1.08959455, + "memory(GiB)": 71.19, + "step": 1960, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.66691604, + "epoch": 1.54, + "learning_rate": 9.706736476856264e-05, + "loss": 1.05755281, + "memory(GiB)": 71.19, + "step": 1965, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.66844997, + "epoch": 1.54, + "learning_rate": 9.704544406711746e-05, + "loss": 1.07190256, + "memory(GiB)": 71.19, + "step": 1970, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.6775053, + "epoch": 1.55, + "learning_rate": 9.702344423851911e-05, + "loss": 1.04533443, + "memory(GiB)": 71.19, + "step": 1975, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.66825333, + "epoch": 1.55, + "learning_rate": 9.700136531976974e-05, + "loss": 1.1034709, + "memory(GiB)": 71.19, + "step": 1980, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.67021503, + "epoch": 1.56, + "learning_rate": 9.697920734800456e-05, + "loss": 1.08550053, + "memory(GiB)": 71.19, + "step": 1985, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.66171827, + "epoch": 1.56, + "learning_rate": 9.695697036049172e-05, + "loss": 1.09154787, + "memory(GiB)": 71.19, + "step": 1990, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.68564267, + "epoch": 1.56, + "learning_rate": 9.693465439463228e-05, + "loss": 1.02461205, + "memory(GiB)": 71.19, + "step": 1995, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.67194061, + "epoch": 1.57, + "learning_rate": 9.691225948796014e-05, + "loss": 1.05746717, + "memory(GiB)": 71.19, + "step": 2000, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.67832704, + "epoch": 1.57, + "learning_rate": 9.6889785678142e-05, + "loss": 1.05458746, + "memory(GiB)": 71.19, + "step": 2005, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.67701178, + "epoch": 1.58, + "learning_rate": 9.68672330029772e-05, + "loss": 1.05478268, + "memory(GiB)": 71.19, + "step": 2010, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.66319699, + "epoch": 1.58, + "learning_rate": 9.684460150039779e-05, + "loss": 1.1452549, + "memory(GiB)": 71.19, + "step": 2015, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.6763217, + "epoch": 1.58, + "learning_rate": 9.682189120846834e-05, + "loss": 1.04298344, + "memory(GiB)": 71.19, + "step": 2020, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.68313031, + "epoch": 1.59, + "learning_rate": 9.679910216538601e-05, + "loss": 1.02893553, + "memory(GiB)": 71.19, + "step": 2025, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.67663183, + "epoch": 1.59, + "learning_rate": 9.677623440948038e-05, + "loss": 1.04474058, + "memory(GiB)": 71.19, + "step": 2030, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.66433477, + "epoch": 1.59, + "learning_rate": 9.675328797921342e-05, + "loss": 1.07190542, + "memory(GiB)": 71.19, + "step": 2035, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.68374462, + "epoch": 1.6, + "learning_rate": 9.67302629131794e-05, + "loss": 1.07545433, + "memory(GiB)": 71.19, + "step": 2040, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.67807226, + "epoch": 1.6, + "learning_rate": 9.670715925010489e-05, + "loss": 1.05842772, + "memory(GiB)": 71.19, + "step": 2045, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.67401872, + "epoch": 1.61, + "learning_rate": 9.668397702884866e-05, + "loss": 1.07067871, + "memory(GiB)": 71.19, + "step": 2050, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.67526712, + "epoch": 1.61, + "learning_rate": 9.666071628840154e-05, + "loss": 1.04234457, + "memory(GiB)": 71.19, + "step": 2055, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.67841988, + "epoch": 1.61, + "learning_rate": 9.663737706788652e-05, + "loss": 1.05335855, + "memory(GiB)": 71.19, + "step": 2060, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.67320924, + "epoch": 1.62, + "learning_rate": 9.66139594065585e-05, + "loss": 1.07291307, + "memory(GiB)": 71.19, + "step": 2065, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.67182488, + "epoch": 1.62, + "learning_rate": 9.659046334380439e-05, + "loss": 1.06681614, + "memory(GiB)": 71.19, + "step": 2070, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.68178086, + "epoch": 1.63, + "learning_rate": 9.65668889191429e-05, + "loss": 1.02115288, + "memory(GiB)": 71.19, + "step": 2075, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.67527838, + "epoch": 1.63, + "learning_rate": 9.654323617222456e-05, + "loss": 1.02676697, + "memory(GiB)": 71.19, + "step": 2080, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.68160815, + "epoch": 1.63, + "learning_rate": 9.651950514283166e-05, + "loss": 1.0352355, + "memory(GiB)": 71.19, + "step": 2085, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.67300172, + "epoch": 1.64, + "learning_rate": 9.649569587087814e-05, + "loss": 1.06639357, + "memory(GiB)": 71.19, + "step": 2090, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.67614121, + "epoch": 1.64, + "learning_rate": 9.647180839640951e-05, + "loss": 1.05420818, + "memory(GiB)": 71.19, + "step": 2095, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.67984071, + "epoch": 1.65, + "learning_rate": 9.644784275960286e-05, + "loss": 1.04375839, + "memory(GiB)": 71.19, + "step": 2100, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.67311459, + "epoch": 1.65, + "learning_rate": 9.64237990007667e-05, + "loss": 1.05994186, + "memory(GiB)": 71.19, + "step": 2105, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.66600237, + "epoch": 1.65, + "learning_rate": 9.639967716034095e-05, + "loss": 1.08946924, + "memory(GiB)": 71.19, + "step": 2110, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.68244076, + "epoch": 1.66, + "learning_rate": 9.637547727889688e-05, + "loss": 0.99340763, + "memory(GiB)": 71.19, + "step": 2115, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.6899344, + "epoch": 1.66, + "learning_rate": 9.6351199397137e-05, + "loss": 1.00486383, + "memory(GiB)": 71.19, + "step": 2120, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.68560667, + "epoch": 1.67, + "learning_rate": 9.632684355589499e-05, + "loss": 1.02728119, + "memory(GiB)": 71.19, + "step": 2125, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.67438531, + "epoch": 1.67, + "learning_rate": 9.630240979613569e-05, + "loss": 1.08428488, + "memory(GiB)": 71.19, + "step": 2130, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.67004066, + "epoch": 1.67, + "learning_rate": 9.627789815895498e-05, + "loss": 1.06729488, + "memory(GiB)": 71.19, + "step": 2135, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.66677608, + "epoch": 1.68, + "learning_rate": 9.625330868557973e-05, + "loss": 1.08347321, + "memory(GiB)": 71.19, + "step": 2140, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.68743095, + "epoch": 1.68, + "learning_rate": 9.622864141736772e-05, + "loss": 1.02547398, + "memory(GiB)": 71.19, + "step": 2145, + "train_speed(iter/s)": 0.024205 + }, + { + "acc": 0.66881833, + "epoch": 1.69, + "learning_rate": 9.620389639580753e-05, + "loss": 1.10209799, + "memory(GiB)": 71.19, + "step": 2150, + "train_speed(iter/s)": 0.024206 + }, + { + "acc": 0.68243523, + "epoch": 1.69, + "learning_rate": 9.617907366251862e-05, + "loss": 1.02223969, + "memory(GiB)": 71.19, + "step": 2155, + "train_speed(iter/s)": 0.024206 + }, + { + "acc": 0.67003131, + "epoch": 1.69, + "learning_rate": 9.615417325925106e-05, + "loss": 1.05287113, + "memory(GiB)": 71.19, + "step": 2160, + "train_speed(iter/s)": 0.024205 + }, + { + "acc": 0.67350674, + "epoch": 1.7, + "learning_rate": 9.612919522788559e-05, + "loss": 1.05360003, + "memory(GiB)": 71.19, + "step": 2165, + "train_speed(iter/s)": 0.024205 + }, + { + "acc": 0.67382431, + "epoch": 1.7, + "learning_rate": 9.610413961043354e-05, + "loss": 1.06978779, + "memory(GiB)": 71.19, + "step": 2170, + "train_speed(iter/s)": 0.024206 + }, + { + "acc": 0.69041929, + "epoch": 1.7, + "learning_rate": 9.607900644903667e-05, + "loss": 0.99977674, + "memory(GiB)": 71.19, + "step": 2175, + "train_speed(iter/s)": 0.024205 + }, + { + "acc": 0.66950564, + "epoch": 1.71, + "learning_rate": 9.605379578596724e-05, + "loss": 1.07388229, + "memory(GiB)": 71.19, + "step": 2180, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.67316561, + "epoch": 1.71, + "learning_rate": 9.60285076636278e-05, + "loss": 1.06690502, + "memory(GiB)": 71.19, + "step": 2185, + "train_speed(iter/s)": 0.024205 + }, + { + "acc": 0.68053436, + "epoch": 1.72, + "learning_rate": 9.600314212455123e-05, + "loss": 1.02924366, + "memory(GiB)": 71.19, + "step": 2190, + "train_speed(iter/s)": 0.024205 + }, + { + "acc": 0.68438997, + "epoch": 1.72, + "learning_rate": 9.597769921140059e-05, + "loss": 1.04086161, + "memory(GiB)": 71.19, + "step": 2195, + "train_speed(iter/s)": 0.024206 + }, + { + "acc": 0.68536568, + "epoch": 1.72, + "learning_rate": 9.595217896696906e-05, + "loss": 1.02934484, + "memory(GiB)": 71.19, + "step": 2200, + "train_speed(iter/s)": 0.024206 + }, + { + "acc": 0.66752796, + "epoch": 1.73, + "learning_rate": 9.592658143417993e-05, + "loss": 1.07700911, + "memory(GiB)": 71.19, + "step": 2205, + "train_speed(iter/s)": 0.024206 + }, + { + "acc": 0.6667316, + "epoch": 1.73, + "learning_rate": 9.590090665608647e-05, + "loss": 1.10646029, + "memory(GiB)": 71.19, + "step": 2210, + "train_speed(iter/s)": 0.024207 + }, + { + "acc": 0.67603645, + "epoch": 1.74, + "learning_rate": 9.587515467587184e-05, + "loss": 1.01645374, + "memory(GiB)": 71.19, + "step": 2215, + "train_speed(iter/s)": 0.024207 + }, + { + "acc": 0.67964444, + "epoch": 1.74, + "learning_rate": 9.584932553684912e-05, + "loss": 1.02113829, + "memory(GiB)": 71.19, + "step": 2220, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67175584, + "epoch": 1.74, + "learning_rate": 9.582341928246105e-05, + "loss": 1.07317562, + "memory(GiB)": 71.19, + "step": 2225, + "train_speed(iter/s)": 0.024207 + }, + { + "acc": 0.67587581, + "epoch": 1.75, + "learning_rate": 9.57974359562802e-05, + "loss": 1.05869913, + "memory(GiB)": 71.19, + "step": 2230, + "train_speed(iter/s)": 0.024207 + }, + { + "acc": 0.67292519, + "epoch": 1.75, + "learning_rate": 9.577137560200868e-05, + "loss": 1.05732136, + "memory(GiB)": 71.19, + "step": 2235, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67593732, + "epoch": 1.76, + "learning_rate": 9.574523826347821e-05, + "loss": 1.05148783, + "memory(GiB)": 71.19, + "step": 2240, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66758423, + "epoch": 1.76, + "learning_rate": 9.571902398464996e-05, + "loss": 1.08099422, + "memory(GiB)": 71.19, + "step": 2245, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67744985, + "epoch": 1.76, + "learning_rate": 9.569273280961451e-05, + "loss": 1.0564353, + "memory(GiB)": 71.19, + "step": 2250, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.68401761, + "epoch": 1.77, + "learning_rate": 9.566636478259178e-05, + "loss": 1.03796558, + "memory(GiB)": 71.19, + "step": 2255, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66767297, + "epoch": 1.77, + "learning_rate": 9.563991994793095e-05, + "loss": 1.07227659, + "memory(GiB)": 71.19, + "step": 2260, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.66152043, + "epoch": 1.78, + "learning_rate": 9.56133983501104e-05, + "loss": 1.12642975, + "memory(GiB)": 71.19, + "step": 2265, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.68994551, + "epoch": 1.78, + "learning_rate": 9.558680003373761e-05, + "loss": 1.02917881, + "memory(GiB)": 71.19, + "step": 2270, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66738191, + "epoch": 1.78, + "learning_rate": 9.556012504354907e-05, + "loss": 1.0579689, + "memory(GiB)": 71.19, + "step": 2275, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.68090143, + "epoch": 1.79, + "learning_rate": 9.553337342441026e-05, + "loss": 1.02114449, + "memory(GiB)": 71.19, + "step": 2280, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.68244548, + "epoch": 1.79, + "learning_rate": 9.550654522131554e-05, + "loss": 1.05710659, + "memory(GiB)": 71.19, + "step": 2285, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.67936034, + "epoch": 1.79, + "learning_rate": 9.547964047938807e-05, + "loss": 1.06786947, + "memory(GiB)": 71.19, + "step": 2290, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.66966805, + "epoch": 1.8, + "learning_rate": 9.545265924387976e-05, + "loss": 1.07189226, + "memory(GiB)": 71.19, + "step": 2295, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66073766, + "epoch": 1.8, + "learning_rate": 9.542560156017114e-05, + "loss": 1.10265789, + "memory(GiB)": 71.19, + "step": 2300, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.6612524, + "epoch": 1.81, + "learning_rate": 9.539846747377137e-05, + "loss": 1.10375538, + "memory(GiB)": 71.19, + "step": 2305, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.6832036, + "epoch": 1.81, + "learning_rate": 9.537125703031808e-05, + "loss": 1.01659298, + "memory(GiB)": 71.19, + "step": 2310, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67649841, + "epoch": 1.81, + "learning_rate": 9.534397027557734e-05, + "loss": 1.05449762, + "memory(GiB)": 71.19, + "step": 2315, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.6723896, + "epoch": 1.82, + "learning_rate": 9.531660725544357e-05, + "loss": 1.08471155, + "memory(GiB)": 71.19, + "step": 2320, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67170587, + "epoch": 1.82, + "learning_rate": 9.528916801593945e-05, + "loss": 1.06638412, + "memory(GiB)": 71.19, + "step": 2325, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66721401, + "epoch": 1.83, + "learning_rate": 9.526165260321587e-05, + "loss": 1.07490387, + "memory(GiB)": 71.19, + "step": 2330, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.66922231, + "epoch": 1.83, + "learning_rate": 9.523406106355181e-05, + "loss": 1.07525682, + "memory(GiB)": 71.19, + "step": 2335, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.68418026, + "epoch": 1.83, + "learning_rate": 9.520639344335436e-05, + "loss": 1.02238379, + "memory(GiB)": 71.19, + "step": 2340, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66266646, + "epoch": 1.84, + "learning_rate": 9.517864978915852e-05, + "loss": 1.107903, + "memory(GiB)": 71.19, + "step": 2345, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66747322, + "epoch": 1.84, + "learning_rate": 9.515083014762714e-05, + "loss": 1.05063229, + "memory(GiB)": 71.19, + "step": 2350, + "train_speed(iter/s)": 0.024207 + }, + { + "acc": 0.68329477, + "epoch": 1.85, + "learning_rate": 9.512293456555094e-05, + "loss": 1.04089613, + "memory(GiB)": 71.19, + "step": 2355, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67805209, + "epoch": 1.85, + "learning_rate": 9.509496308984834e-05, + "loss": 1.05391035, + "memory(GiB)": 71.19, + "step": 2360, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.68161922, + "epoch": 1.85, + "learning_rate": 9.506691576756542e-05, + "loss": 1.0376071, + "memory(GiB)": 71.19, + "step": 2365, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.68472705, + "epoch": 1.86, + "learning_rate": 9.50387926458758e-05, + "loss": 1.03176441, + "memory(GiB)": 71.19, + "step": 2370, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67763271, + "epoch": 1.86, + "learning_rate": 9.501059377208062e-05, + "loss": 1.02066383, + "memory(GiB)": 71.19, + "step": 2375, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67828851, + "epoch": 1.87, + "learning_rate": 9.49823191936084e-05, + "loss": 1.03181, + "memory(GiB)": 71.19, + "step": 2380, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.68185763, + "epoch": 1.87, + "learning_rate": 9.495396895801504e-05, + "loss": 1.01452837, + "memory(GiB)": 71.19, + "step": 2385, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.69047713, + "epoch": 1.87, + "learning_rate": 9.492554311298363e-05, + "loss": 1.03052311, + "memory(GiB)": 71.19, + "step": 2390, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67350526, + "epoch": 1.88, + "learning_rate": 9.489704170632448e-05, + "loss": 1.05733795, + "memory(GiB)": 71.19, + "step": 2395, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66548829, + "epoch": 1.88, + "learning_rate": 9.486846478597493e-05, + "loss": 1.10433922, + "memory(GiB)": 71.19, + "step": 2400, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.67878256, + "epoch": 1.88, + "learning_rate": 9.48398123999994e-05, + "loss": 1.04339581, + "memory(GiB)": 71.19, + "step": 2405, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.66891594, + "epoch": 1.89, + "learning_rate": 9.481108459658918e-05, + "loss": 1.06247549, + "memory(GiB)": 71.19, + "step": 2410, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.67175364, + "epoch": 1.89, + "learning_rate": 9.478228142406245e-05, + "loss": 1.08076143, + "memory(GiB)": 71.19, + "step": 2415, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66775222, + "epoch": 1.9, + "learning_rate": 9.475340293086414e-05, + "loss": 1.07804575, + "memory(GiB)": 71.19, + "step": 2420, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.67420983, + "epoch": 1.9, + "learning_rate": 9.472444916556582e-05, + "loss": 1.06133413, + "memory(GiB)": 71.19, + "step": 2425, + "train_speed(iter/s)": 0.024208 + }, + { + "acc": 0.66697445, + "epoch": 1.9, + "learning_rate": 9.469542017686574e-05, + "loss": 1.08241444, + "memory(GiB)": 71.19, + "step": 2430, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.65952163, + "epoch": 1.91, + "learning_rate": 9.466631601358865e-05, + "loss": 1.12456112, + "memory(GiB)": 71.19, + "step": 2435, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.68122878, + "epoch": 1.91, + "learning_rate": 9.463713672468566e-05, + "loss": 1.02802782, + "memory(GiB)": 71.19, + "step": 2440, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.67782488, + "epoch": 1.92, + "learning_rate": 9.460788235923434e-05, + "loss": 1.04834585, + "memory(GiB)": 71.19, + "step": 2445, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.6813798, + "epoch": 1.92, + "learning_rate": 9.457855296643847e-05, + "loss": 1.03965998, + "memory(GiB)": 71.19, + "step": 2450, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.6787158, + "epoch": 1.92, + "learning_rate": 9.454914859562806e-05, + "loss": 1.03802996, + "memory(GiB)": 71.19, + "step": 2455, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.67819457, + "epoch": 1.93, + "learning_rate": 9.451966929625921e-05, + "loss": 1.04884434, + "memory(GiB)": 71.19, + "step": 2460, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.68044524, + "epoch": 1.93, + "learning_rate": 9.449011511791403e-05, + "loss": 1.00336819, + "memory(GiB)": 71.19, + "step": 2465, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.68315306, + "epoch": 1.94, + "learning_rate": 9.44604861103006e-05, + "loss": 1.0400569, + "memory(GiB)": 71.19, + "step": 2470, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.67919292, + "epoch": 1.94, + "learning_rate": 9.443078232325283e-05, + "loss": 1.0457303, + "memory(GiB)": 71.19, + "step": 2475, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.67261739, + "epoch": 1.94, + "learning_rate": 9.440100380673041e-05, + "loss": 1.08123322, + "memory(GiB)": 71.19, + "step": 2480, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.67910471, + "epoch": 1.95, + "learning_rate": 9.437115061081873e-05, + "loss": 1.06696892, + "memory(GiB)": 71.19, + "step": 2485, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.68002834, + "epoch": 1.95, + "learning_rate": 9.434122278572881e-05, + "loss": 1.01719761, + "memory(GiB)": 71.19, + "step": 2490, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.68830619, + "epoch": 1.96, + "learning_rate": 9.431122038179713e-05, + "loss": 1.02319183, + "memory(GiB)": 71.19, + "step": 2495, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.68742871, + "epoch": 1.96, + "learning_rate": 9.428114344948566e-05, + "loss": 1.01065245, + "memory(GiB)": 71.19, + "step": 2500, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.67193766, + "epoch": 1.96, + "learning_rate": 9.425099203938169e-05, + "loss": 1.062183, + "memory(GiB)": 71.19, + "step": 2505, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.66727209, + "epoch": 1.97, + "learning_rate": 9.422076620219777e-05, + "loss": 1.06330957, + "memory(GiB)": 71.19, + "step": 2510, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.68619056, + "epoch": 1.97, + "learning_rate": 9.419046598877169e-05, + "loss": 1.02488794, + "memory(GiB)": 71.19, + "step": 2515, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.67550688, + "epoch": 1.98, + "learning_rate": 9.416009145006626e-05, + "loss": 1.0441782, + "memory(GiB)": 71.19, + "step": 2520, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.69182601, + "epoch": 1.98, + "learning_rate": 9.412964263716934e-05, + "loss": 0.99197636, + "memory(GiB)": 71.19, + "step": 2525, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.67208972, + "epoch": 1.98, + "learning_rate": 9.409911960129373e-05, + "loss": 1.04292965, + "memory(GiB)": 71.19, + "step": 2530, + "train_speed(iter/s)": 0.024209 + }, + { + "acc": 0.69247909, + "epoch": 1.99, + "learning_rate": 9.406852239377702e-05, + "loss": 1.03010435, + "memory(GiB)": 71.19, + "step": 2535, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.67900863, + "epoch": 1.99, + "learning_rate": 9.403785106608159e-05, + "loss": 1.02731123, + "memory(GiB)": 71.19, + "step": 2540, + "train_speed(iter/s)": 0.024211 + }, + { + "acc": 0.68802767, + "epoch": 1.99, + "learning_rate": 9.400710566979446e-05, + "loss": 1.02137775, + "memory(GiB)": 71.19, + "step": 2545, + "train_speed(iter/s)": 0.02421 + }, + { + "acc": 0.68384156, + "epoch": 2.0, + "learning_rate": 9.397628625662724e-05, + "loss": 1.00198641, + "memory(GiB)": 71.19, + "step": 2550, + "train_speed(iter/s)": 0.02421 + }, + { + "epoch": 2.0, + "eval_acc": 0.7063611319809667, + "eval_loss": 0.9125774502754211, + "eval_runtime": 107.4016, + "eval_samples_per_second": 0.866, + "eval_steps_per_second": 0.866, + "step": 2551 + }, + { + "acc": 0.69561715, + "epoch": 2.0, + "learning_rate": 9.394539287841606e-05, + "loss": 0.96205349, + "memory(GiB)": 71.19, + "step": 2555, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.70156784, + "epoch": 2.01, + "learning_rate": 9.391442558712141e-05, + "loss": 0.95558033, + "memory(GiB)": 71.19, + "step": 2560, + "train_speed(iter/s)": 0.024184 + }, + { + "acc": 0.70194588, + "epoch": 2.01, + "learning_rate": 9.38833844348281e-05, + "loss": 0.94522266, + "memory(GiB)": 71.19, + "step": 2565, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.69583673, + "epoch": 2.01, + "learning_rate": 9.385226947374519e-05, + "loss": 0.95971365, + "memory(GiB)": 71.19, + "step": 2570, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.69883165, + "epoch": 2.02, + "learning_rate": 9.382108075620588e-05, + "loss": 0.95136395, + "memory(GiB)": 71.19, + "step": 2575, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.70211344, + "epoch": 2.02, + "learning_rate": 9.37898183346674e-05, + "loss": 0.95915556, + "memory(GiB)": 71.19, + "step": 2580, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.6914197, + "epoch": 2.03, + "learning_rate": 9.375848226171097e-05, + "loss": 0.97404881, + "memory(GiB)": 71.19, + "step": 2585, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.69058862, + "epoch": 2.03, + "learning_rate": 9.372707259004168e-05, + "loss": 0.97601357, + "memory(GiB)": 71.19, + "step": 2590, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.69468822, + "epoch": 2.03, + "learning_rate": 9.369558937248841e-05, + "loss": 0.94959822, + "memory(GiB)": 71.19, + "step": 2595, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.69152589, + "epoch": 2.04, + "learning_rate": 9.366403266200372e-05, + "loss": 0.98147879, + "memory(GiB)": 71.19, + "step": 2600, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.69578485, + "epoch": 2.04, + "learning_rate": 9.363240251166381e-05, + "loss": 0.96815405, + "memory(GiB)": 71.19, + "step": 2605, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.70924282, + "epoch": 2.05, + "learning_rate": 9.360069897466837e-05, + "loss": 0.92589321, + "memory(GiB)": 71.19, + "step": 2610, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.69980206, + "epoch": 2.05, + "learning_rate": 9.356892210434054e-05, + "loss": 0.93600454, + "memory(GiB)": 71.19, + "step": 2615, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.68358474, + "epoch": 2.05, + "learning_rate": 9.353707195412682e-05, + "loss": 0.98476477, + "memory(GiB)": 71.19, + "step": 2620, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.70984707, + "epoch": 2.06, + "learning_rate": 9.35051485775969e-05, + "loss": 0.92595644, + "memory(GiB)": 71.19, + "step": 2625, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.70100389, + "epoch": 2.06, + "learning_rate": 9.34731520284437e-05, + "loss": 0.97320604, + "memory(GiB)": 71.19, + "step": 2630, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.69202828, + "epoch": 2.07, + "learning_rate": 9.344108236048317e-05, + "loss": 0.9583993, + "memory(GiB)": 71.19, + "step": 2635, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.69916315, + "epoch": 2.07, + "learning_rate": 9.340893962765427e-05, + "loss": 0.97032738, + "memory(GiB)": 71.19, + "step": 2640, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.69505591, + "epoch": 2.07, + "learning_rate": 9.337672388401882e-05, + "loss": 0.9470933, + "memory(GiB)": 71.19, + "step": 2645, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.69918957, + "epoch": 2.08, + "learning_rate": 9.334443518376144e-05, + "loss": 0.94485273, + "memory(GiB)": 71.19, + "step": 2650, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.69890566, + "epoch": 2.08, + "learning_rate": 9.33120735811895e-05, + "loss": 0.95022421, + "memory(GiB)": 71.19, + "step": 2655, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.70103612, + "epoch": 2.08, + "learning_rate": 9.327963913073292e-05, + "loss": 0.93258381, + "memory(GiB)": 71.19, + "step": 2660, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.68382578, + "epoch": 2.09, + "learning_rate": 9.32471318869442e-05, + "loss": 0.98318644, + "memory(GiB)": 71.19, + "step": 2665, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.69849315, + "epoch": 2.09, + "learning_rate": 9.321455190449826e-05, + "loss": 0.95109901, + "memory(GiB)": 71.19, + "step": 2670, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.69872427, + "epoch": 2.1, + "learning_rate": 9.318189923819237e-05, + "loss": 0.95068798, + "memory(GiB)": 71.19, + "step": 2675, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.6956666, + "epoch": 2.1, + "learning_rate": 9.314917394294601e-05, + "loss": 0.94351721, + "memory(GiB)": 71.19, + "step": 2680, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.68996038, + "epoch": 2.1, + "learning_rate": 9.311637607380087e-05, + "loss": 0.98688478, + "memory(GiB)": 71.19, + "step": 2685, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.69613433, + "epoch": 2.11, + "learning_rate": 9.308350568592062e-05, + "loss": 0.96690464, + "memory(GiB)": 71.19, + "step": 2690, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.70577445, + "epoch": 2.11, + "learning_rate": 9.305056283459101e-05, + "loss": 0.94153576, + "memory(GiB)": 71.19, + "step": 2695, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.69807467, + "epoch": 2.12, + "learning_rate": 9.30175475752196e-05, + "loss": 0.95756607, + "memory(GiB)": 71.19, + "step": 2700, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.69821429, + "epoch": 2.12, + "learning_rate": 9.298445996333576e-05, + "loss": 0.94874229, + "memory(GiB)": 71.19, + "step": 2705, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.70757422, + "epoch": 2.12, + "learning_rate": 9.295130005459053e-05, + "loss": 0.92731676, + "memory(GiB)": 71.19, + "step": 2710, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.70590787, + "epoch": 2.13, + "learning_rate": 9.291806790475659e-05, + "loss": 0.91320696, + "memory(GiB)": 71.19, + "step": 2715, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.69287601, + "epoch": 2.13, + "learning_rate": 9.288476356972806e-05, + "loss": 0.96277952, + "memory(GiB)": 71.19, + "step": 2720, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.69249158, + "epoch": 2.14, + "learning_rate": 9.285138710552053e-05, + "loss": 0.9557168, + "memory(GiB)": 71.19, + "step": 2725, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.70517898, + "epoch": 2.14, + "learning_rate": 9.281793856827086e-05, + "loss": 0.95910587, + "memory(GiB)": 71.19, + "step": 2730, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.68601875, + "epoch": 2.14, + "learning_rate": 9.278441801423718e-05, + "loss": 0.98039818, + "memory(GiB)": 71.19, + "step": 2735, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.69862165, + "epoch": 2.15, + "learning_rate": 9.275082549979872e-05, + "loss": 0.94497709, + "memory(GiB)": 71.19, + "step": 2740, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.6958168, + "epoch": 2.15, + "learning_rate": 9.271716108145574e-05, + "loss": 0.95876045, + "memory(GiB)": 71.19, + "step": 2745, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.69509659, + "epoch": 2.16, + "learning_rate": 9.268342481582944e-05, + "loss": 0.98702393, + "memory(GiB)": 71.19, + "step": 2750, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.69033999, + "epoch": 2.16, + "learning_rate": 9.264961675966186e-05, + "loss": 0.991891, + "memory(GiB)": 71.19, + "step": 2755, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.70156898, + "epoch": 2.16, + "learning_rate": 9.261573696981579e-05, + "loss": 0.93319263, + "memory(GiB)": 71.19, + "step": 2760, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.70409079, + "epoch": 2.17, + "learning_rate": 9.258178550327468e-05, + "loss": 0.94536772, + "memory(GiB)": 71.19, + "step": 2765, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68395271, + "epoch": 2.17, + "learning_rate": 9.254776241714251e-05, + "loss": 0.99986162, + "memory(GiB)": 71.19, + "step": 2770, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68076348, + "epoch": 2.17, + "learning_rate": 9.251366776864377e-05, + "loss": 1.00616322, + "memory(GiB)": 71.19, + "step": 2775, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68825445, + "epoch": 2.18, + "learning_rate": 9.247950161512324e-05, + "loss": 0.97691736, + "memory(GiB)": 71.19, + "step": 2780, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.70971127, + "epoch": 2.18, + "learning_rate": 9.244526401404604e-05, + "loss": 0.92741508, + "memory(GiB)": 71.19, + "step": 2785, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69368434, + "epoch": 2.19, + "learning_rate": 9.24109550229974e-05, + "loss": 0.97553349, + "memory(GiB)": 71.19, + "step": 2790, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.71132045, + "epoch": 2.19, + "learning_rate": 9.237657469968266e-05, + "loss": 0.92014847, + "memory(GiB)": 71.19, + "step": 2795, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69388194, + "epoch": 2.19, + "learning_rate": 9.234212310192711e-05, + "loss": 0.94383049, + "memory(GiB)": 71.19, + "step": 2800, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.69455805, + "epoch": 2.2, + "learning_rate": 9.230760028767597e-05, + "loss": 0.9620759, + "memory(GiB)": 71.19, + "step": 2805, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69068451, + "epoch": 2.2, + "learning_rate": 9.227300631499416e-05, + "loss": 0.94675779, + "memory(GiB)": 71.19, + "step": 2810, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69235501, + "epoch": 2.21, + "learning_rate": 9.223834124206635e-05, + "loss": 0.97269449, + "memory(GiB)": 71.19, + "step": 2815, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69006801, + "epoch": 2.21, + "learning_rate": 9.220360512719676e-05, + "loss": 0.95500069, + "memory(GiB)": 71.19, + "step": 2820, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69353352, + "epoch": 2.21, + "learning_rate": 9.216879802880913e-05, + "loss": 0.9617177, + "memory(GiB)": 71.19, + "step": 2825, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69113631, + "epoch": 2.22, + "learning_rate": 9.213392000544656e-05, + "loss": 0.97730999, + "memory(GiB)": 71.19, + "step": 2830, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69324679, + "epoch": 2.22, + "learning_rate": 9.209897111577144e-05, + "loss": 0.96714506, + "memory(GiB)": 71.19, + "step": 2835, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.696696, + "epoch": 2.23, + "learning_rate": 9.206395141856538e-05, + "loss": 0.98762465, + "memory(GiB)": 71.19, + "step": 2840, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.70372343, + "epoch": 2.23, + "learning_rate": 9.202886097272907e-05, + "loss": 0.92046738, + "memory(GiB)": 71.19, + "step": 2845, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70415306, + "epoch": 2.23, + "learning_rate": 9.199369983728217e-05, + "loss": 0.92664623, + "memory(GiB)": 71.19, + "step": 2850, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69124141, + "epoch": 2.24, + "learning_rate": 9.195846807136326e-05, + "loss": 0.9966094, + "memory(GiB)": 71.19, + "step": 2855, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68278403, + "epoch": 2.24, + "learning_rate": 9.192316573422972e-05, + "loss": 1.00215902, + "memory(GiB)": 71.19, + "step": 2860, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.70400138, + "epoch": 2.25, + "learning_rate": 9.188779288525761e-05, + "loss": 0.95005407, + "memory(GiB)": 71.19, + "step": 2865, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.70103235, + "epoch": 2.25, + "learning_rate": 9.18523495839416e-05, + "loss": 0.93353863, + "memory(GiB)": 71.19, + "step": 2870, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69932871, + "epoch": 2.25, + "learning_rate": 9.181683588989485e-05, + "loss": 0.94103365, + "memory(GiB)": 71.19, + "step": 2875, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69865599, + "epoch": 2.26, + "learning_rate": 9.17812518628489e-05, + "loss": 0.96315451, + "memory(GiB)": 71.19, + "step": 2880, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69689775, + "epoch": 2.26, + "learning_rate": 9.174559756265361e-05, + "loss": 0.95352697, + "memory(GiB)": 71.19, + "step": 2885, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69051638, + "epoch": 2.27, + "learning_rate": 9.170987304927704e-05, + "loss": 0.97824144, + "memory(GiB)": 71.19, + "step": 2890, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70052414, + "epoch": 2.27, + "learning_rate": 9.167407838280531e-05, + "loss": 0.93514223, + "memory(GiB)": 71.19, + "step": 2895, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69622307, + "epoch": 2.27, + "learning_rate": 9.163821362344254e-05, + "loss": 0.96024828, + "memory(GiB)": 71.19, + "step": 2900, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.68836212, + "epoch": 2.28, + "learning_rate": 9.160227883151077e-05, + "loss": 0.97077913, + "memory(GiB)": 71.19, + "step": 2905, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.6831625, + "epoch": 2.28, + "learning_rate": 9.15662740674498e-05, + "loss": 1.00156384, + "memory(GiB)": 71.19, + "step": 2910, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69906459, + "epoch": 2.28, + "learning_rate": 9.153019939181716e-05, + "loss": 0.96558504, + "memory(GiB)": 71.19, + "step": 2915, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69677444, + "epoch": 2.29, + "learning_rate": 9.149405486528788e-05, + "loss": 0.97523499, + "memory(GiB)": 71.19, + "step": 2920, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68966751, + "epoch": 2.29, + "learning_rate": 9.145784054865458e-05, + "loss": 0.97728767, + "memory(GiB)": 71.19, + "step": 2925, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.7036571, + "epoch": 2.3, + "learning_rate": 9.14215565028272e-05, + "loss": 0.94556446, + "memory(GiB)": 71.19, + "step": 2930, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69022222, + "epoch": 2.3, + "learning_rate": 9.138520278883297e-05, + "loss": 0.96314068, + "memory(GiB)": 71.19, + "step": 2935, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68896809, + "epoch": 2.3, + "learning_rate": 9.13487794678163e-05, + "loss": 0.99049673, + "memory(GiB)": 71.19, + "step": 2940, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69701419, + "epoch": 2.31, + "learning_rate": 9.131228660103866e-05, + "loss": 0.97968254, + "memory(GiB)": 71.19, + "step": 2945, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69701881, + "epoch": 2.31, + "learning_rate": 9.127572424987853e-05, + "loss": 0.97570438, + "memory(GiB)": 71.19, + "step": 2950, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70094166, + "epoch": 2.32, + "learning_rate": 9.12390924758312e-05, + "loss": 0.93014622, + "memory(GiB)": 71.19, + "step": 2955, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70081906, + "epoch": 2.32, + "learning_rate": 9.12023913405088e-05, + "loss": 0.97871437, + "memory(GiB)": 71.19, + "step": 2960, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70025148, + "epoch": 2.32, + "learning_rate": 9.116562090564005e-05, + "loss": 0.93432608, + "memory(GiB)": 71.19, + "step": 2965, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.68864193, + "epoch": 2.33, + "learning_rate": 9.112878123307025e-05, + "loss": 0.98558102, + "memory(GiB)": 71.19, + "step": 2970, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.69512825, + "epoch": 2.33, + "learning_rate": 9.109187238476116e-05, + "loss": 0.94346056, + "memory(GiB)": 71.19, + "step": 2975, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70853047, + "epoch": 2.34, + "learning_rate": 9.105489442279092e-05, + "loss": 0.91953535, + "memory(GiB)": 71.19, + "step": 2980, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70059996, + "epoch": 2.34, + "learning_rate": 9.101784740935383e-05, + "loss": 0.93242407, + "memory(GiB)": 71.19, + "step": 2985, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.6832633, + "epoch": 2.34, + "learning_rate": 9.098073140676042e-05, + "loss": 1.00155811, + "memory(GiB)": 71.19, + "step": 2990, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.69065361, + "epoch": 2.35, + "learning_rate": 9.09435464774372e-05, + "loss": 0.98291121, + "memory(GiB)": 71.19, + "step": 2995, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69865141, + "epoch": 2.35, + "learning_rate": 9.090629268392661e-05, + "loss": 0.96651316, + "memory(GiB)": 71.19, + "step": 3000, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69779391, + "epoch": 2.36, + "learning_rate": 9.086897008888697e-05, + "loss": 0.96862831, + "memory(GiB)": 71.19, + "step": 3005, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.7000648, + "epoch": 2.36, + "learning_rate": 9.083157875509226e-05, + "loss": 0.95280199, + "memory(GiB)": 71.19, + "step": 3010, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69280767, + "epoch": 2.36, + "learning_rate": 9.079411874543206e-05, + "loss": 0.96452456, + "memory(GiB)": 71.19, + "step": 3015, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.70206046, + "epoch": 2.37, + "learning_rate": 9.075659012291155e-05, + "loss": 0.94142323, + "memory(GiB)": 71.19, + "step": 3020, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69001818, + "epoch": 2.37, + "learning_rate": 9.071899295065122e-05, + "loss": 0.97127142, + "memory(GiB)": 71.19, + "step": 3025, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69132757, + "epoch": 2.37, + "learning_rate": 9.068132729188689e-05, + "loss": 0.95719604, + "memory(GiB)": 71.19, + "step": 3030, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69102788, + "epoch": 2.38, + "learning_rate": 9.064359320996958e-05, + "loss": 0.97239933, + "memory(GiB)": 71.19, + "step": 3035, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68073072, + "epoch": 2.38, + "learning_rate": 9.060579076836537e-05, + "loss": 1.00775642, + "memory(GiB)": 71.19, + "step": 3040, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69667606, + "epoch": 2.39, + "learning_rate": 9.056792003065535e-05, + "loss": 0.9632431, + "memory(GiB)": 71.19, + "step": 3045, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70922837, + "epoch": 2.39, + "learning_rate": 9.052998106053544e-05, + "loss": 0.91346598, + "memory(GiB)": 71.19, + "step": 3050, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.68625507, + "epoch": 2.39, + "learning_rate": 9.049197392181632e-05, + "loss": 0.97347794, + "memory(GiB)": 71.19, + "step": 3055, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69933372, + "epoch": 2.4, + "learning_rate": 9.045389867842338e-05, + "loss": 0.97092943, + "memory(GiB)": 71.19, + "step": 3060, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70825005, + "epoch": 2.4, + "learning_rate": 9.041575539439651e-05, + "loss": 0.92969103, + "memory(GiB)": 71.19, + "step": 3065, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69653683, + "epoch": 2.41, + "learning_rate": 9.037754413389005e-05, + "loss": 0.96352663, + "memory(GiB)": 71.19, + "step": 3070, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69451375, + "epoch": 2.41, + "learning_rate": 9.033926496117268e-05, + "loss": 0.96977873, + "memory(GiB)": 71.19, + "step": 3075, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.687293, + "epoch": 2.41, + "learning_rate": 9.030091794062728e-05, + "loss": 0.98685484, + "memory(GiB)": 71.19, + "step": 3080, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69902458, + "epoch": 2.42, + "learning_rate": 9.026250313675086e-05, + "loss": 0.95378513, + "memory(GiB)": 71.19, + "step": 3085, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69570527, + "epoch": 2.42, + "learning_rate": 9.022402061415448e-05, + "loss": 0.94871998, + "memory(GiB)": 71.19, + "step": 3090, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69046903, + "epoch": 2.43, + "learning_rate": 9.018547043756299e-05, + "loss": 0.97162466, + "memory(GiB)": 71.19, + "step": 3095, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69036608, + "epoch": 2.43, + "learning_rate": 9.014685267181515e-05, + "loss": 0.97763748, + "memory(GiB)": 71.19, + "step": 3100, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69294906, + "epoch": 2.43, + "learning_rate": 9.010816738186335e-05, + "loss": 0.98672915, + "memory(GiB)": 71.19, + "step": 3105, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69603667, + "epoch": 2.44, + "learning_rate": 9.006941463277349e-05, + "loss": 0.96144562, + "memory(GiB)": 71.19, + "step": 3110, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69827685, + "epoch": 2.44, + "learning_rate": 9.003059448972504e-05, + "loss": 0.94170532, + "memory(GiB)": 71.19, + "step": 3115, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69537177, + "epoch": 2.45, + "learning_rate": 8.999170701801076e-05, + "loss": 0.94646502, + "memory(GiB)": 71.19, + "step": 3120, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69200006, + "epoch": 2.45, + "learning_rate": 8.995275228303667e-05, + "loss": 1.00812445, + "memory(GiB)": 71.19, + "step": 3125, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69033399, + "epoch": 2.45, + "learning_rate": 8.99137303503219e-05, + "loss": 0.99082174, + "memory(GiB)": 71.19, + "step": 3130, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69509125, + "epoch": 2.46, + "learning_rate": 8.987464128549862e-05, + "loss": 0.96717873, + "memory(GiB)": 71.19, + "step": 3135, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69468803, + "epoch": 2.46, + "learning_rate": 8.98354851543119e-05, + "loss": 0.93988543, + "memory(GiB)": 71.19, + "step": 3140, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70322733, + "epoch": 2.46, + "learning_rate": 8.979626202261966e-05, + "loss": 0.92626677, + "memory(GiB)": 71.19, + "step": 3145, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69417615, + "epoch": 2.47, + "learning_rate": 8.97569719563924e-05, + "loss": 0.95528698, + "memory(GiB)": 71.19, + "step": 3150, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69935951, + "epoch": 2.47, + "learning_rate": 8.971761502171335e-05, + "loss": 0.94430065, + "memory(GiB)": 71.19, + "step": 3155, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69067097, + "epoch": 2.48, + "learning_rate": 8.967819128477807e-05, + "loss": 0.97750225, + "memory(GiB)": 71.19, + "step": 3160, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69903126, + "epoch": 2.48, + "learning_rate": 8.963870081189454e-05, + "loss": 0.94778118, + "memory(GiB)": 71.19, + "step": 3165, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70578423, + "epoch": 2.48, + "learning_rate": 8.959914366948302e-05, + "loss": 0.92893019, + "memory(GiB)": 71.19, + "step": 3170, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69360132, + "epoch": 2.49, + "learning_rate": 8.95595199240758e-05, + "loss": 0.94542074, + "memory(GiB)": 71.19, + "step": 3175, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70106397, + "epoch": 2.49, + "learning_rate": 8.951982964231728e-05, + "loss": 0.94324751, + "memory(GiB)": 71.19, + "step": 3180, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70175576, + "epoch": 2.5, + "learning_rate": 8.948007289096379e-05, + "loss": 0.95341234, + "memory(GiB)": 71.19, + "step": 3185, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.70615592, + "epoch": 2.5, + "learning_rate": 8.944024973688334e-05, + "loss": 0.92322388, + "memory(GiB)": 71.19, + "step": 3190, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.70504413, + "epoch": 2.5, + "learning_rate": 8.940036024705574e-05, + "loss": 0.95310001, + "memory(GiB)": 71.19, + "step": 3195, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69084344, + "epoch": 2.51, + "learning_rate": 8.93604044885723e-05, + "loss": 0.97776327, + "memory(GiB)": 71.19, + "step": 3200, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.70420504, + "epoch": 2.51, + "learning_rate": 8.932038252863583e-05, + "loss": 0.9348093, + "memory(GiB)": 71.19, + "step": 3205, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69500103, + "epoch": 2.52, + "learning_rate": 8.92802944345605e-05, + "loss": 0.9558012, + "memory(GiB)": 71.19, + "step": 3210, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.71531296, + "epoch": 2.52, + "learning_rate": 8.924014027377164e-05, + "loss": 0.9156971, + "memory(GiB)": 71.19, + "step": 3215, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70045333, + "epoch": 2.52, + "learning_rate": 8.919992011380576e-05, + "loss": 0.93506889, + "memory(GiB)": 71.19, + "step": 3220, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.68789282, + "epoch": 2.53, + "learning_rate": 8.915963402231038e-05, + "loss": 0.97745686, + "memory(GiB)": 71.19, + "step": 3225, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.7019794, + "epoch": 2.53, + "learning_rate": 8.911928206704387e-05, + "loss": 0.950805, + "memory(GiB)": 71.19, + "step": 3230, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69622092, + "epoch": 2.54, + "learning_rate": 8.907886431587543e-05, + "loss": 0.96481142, + "memory(GiB)": 71.19, + "step": 3235, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69668698, + "epoch": 2.54, + "learning_rate": 8.903838083678486e-05, + "loss": 0.95254927, + "memory(GiB)": 71.19, + "step": 3240, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70273995, + "epoch": 2.54, + "learning_rate": 8.899783169786257e-05, + "loss": 0.93340073, + "memory(GiB)": 71.19, + "step": 3245, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69511542, + "epoch": 2.55, + "learning_rate": 8.895721696730939e-05, + "loss": 0.96184444, + "memory(GiB)": 71.19, + "step": 3250, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70252714, + "epoch": 2.55, + "learning_rate": 8.891653671343643e-05, + "loss": 0.94699078, + "memory(GiB)": 71.19, + "step": 3255, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68809547, + "epoch": 2.56, + "learning_rate": 8.887579100466508e-05, + "loss": 0.96380692, + "memory(GiB)": 71.19, + "step": 3260, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68375864, + "epoch": 2.56, + "learning_rate": 8.883497990952674e-05, + "loss": 0.98428965, + "memory(GiB)": 71.19, + "step": 3265, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.68760471, + "epoch": 2.56, + "learning_rate": 8.879410349666284e-05, + "loss": 0.99547338, + "memory(GiB)": 71.19, + "step": 3270, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.6960813, + "epoch": 2.57, + "learning_rate": 8.875316183482464e-05, + "loss": 0.96283712, + "memory(GiB)": 71.19, + "step": 3275, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69431438, + "epoch": 2.57, + "learning_rate": 8.871215499287319e-05, + "loss": 0.97081881, + "memory(GiB)": 71.19, + "step": 3280, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69778886, + "epoch": 2.57, + "learning_rate": 8.867108303977912e-05, + "loss": 0.95300598, + "memory(GiB)": 71.19, + "step": 3285, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.70309801, + "epoch": 2.58, + "learning_rate": 8.862994604462256e-05, + "loss": 0.93476143, + "memory(GiB)": 71.19, + "step": 3290, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69512172, + "epoch": 2.58, + "learning_rate": 8.85887440765931e-05, + "loss": 0.96246347, + "memory(GiB)": 71.19, + "step": 3295, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.71056523, + "epoch": 2.59, + "learning_rate": 8.854747720498954e-05, + "loss": 0.90803175, + "memory(GiB)": 71.19, + "step": 3300, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69368334, + "epoch": 2.59, + "learning_rate": 8.850614549921994e-05, + "loss": 0.99619999, + "memory(GiB)": 71.19, + "step": 3305, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.68741622, + "epoch": 2.59, + "learning_rate": 8.846474902880128e-05, + "loss": 1.01268291, + "memory(GiB)": 71.19, + "step": 3310, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.68540864, + "epoch": 2.6, + "learning_rate": 8.842328786335956e-05, + "loss": 1.00386658, + "memory(GiB)": 71.19, + "step": 3315, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69587579, + "epoch": 2.6, + "learning_rate": 8.838176207262958e-05, + "loss": 0.95101843, + "memory(GiB)": 71.19, + "step": 3320, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69762154, + "epoch": 2.61, + "learning_rate": 8.834017172645478e-05, + "loss": 0.94804831, + "memory(GiB)": 71.19, + "step": 3325, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69709311, + "epoch": 2.61, + "learning_rate": 8.829851689478725e-05, + "loss": 0.99054155, + "memory(GiB)": 71.19, + "step": 3330, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.69299603, + "epoch": 2.61, + "learning_rate": 8.825679764768751e-05, + "loss": 0.95626192, + "memory(GiB)": 71.19, + "step": 3335, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69584079, + "epoch": 2.62, + "learning_rate": 8.821501405532442e-05, + "loss": 0.95285006, + "memory(GiB)": 71.19, + "step": 3340, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69174972, + "epoch": 2.62, + "learning_rate": 8.817316618797507e-05, + "loss": 0.95945415, + "memory(GiB)": 71.19, + "step": 3345, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69653916, + "epoch": 2.63, + "learning_rate": 8.813125411602464e-05, + "loss": 0.9533721, + "memory(GiB)": 71.19, + "step": 3350, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.69422874, + "epoch": 2.63, + "learning_rate": 8.80892779099663e-05, + "loss": 0.96425648, + "memory(GiB)": 71.19, + "step": 3355, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.68755012, + "epoch": 2.63, + "learning_rate": 8.804723764040112e-05, + "loss": 0.97660484, + "memory(GiB)": 71.19, + "step": 3360, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.6888967, + "epoch": 2.64, + "learning_rate": 8.800513337803788e-05, + "loss": 0.95651054, + "memory(GiB)": 71.19, + "step": 3365, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69544282, + "epoch": 2.64, + "learning_rate": 8.796296519369303e-05, + "loss": 0.9503314, + "memory(GiB)": 71.19, + "step": 3370, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.69458733, + "epoch": 2.65, + "learning_rate": 8.792073315829052e-05, + "loss": 0.940135, + "memory(GiB)": 71.19, + "step": 3375, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.69739385, + "epoch": 2.65, + "learning_rate": 8.787843734286165e-05, + "loss": 0.9286274, + "memory(GiB)": 71.19, + "step": 3380, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.71626267, + "epoch": 2.65, + "learning_rate": 8.783607781854506e-05, + "loss": 0.91786966, + "memory(GiB)": 71.19, + "step": 3385, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70292225, + "epoch": 2.66, + "learning_rate": 8.77936546565865e-05, + "loss": 0.94578028, + "memory(GiB)": 71.19, + "step": 3390, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70064354, + "epoch": 2.66, + "learning_rate": 8.775116792833878e-05, + "loss": 0.95401373, + "memory(GiB)": 71.19, + "step": 3395, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.68651481, + "epoch": 2.66, + "learning_rate": 8.770861770526158e-05, + "loss": 0.96929026, + "memory(GiB)": 71.19, + "step": 3400, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.68540168, + "epoch": 2.67, + "learning_rate": 8.766600405892145e-05, + "loss": 1.01124077, + "memory(GiB)": 71.19, + "step": 3405, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70259171, + "epoch": 2.67, + "learning_rate": 8.762332706099153e-05, + "loss": 0.94904728, + "memory(GiB)": 71.19, + "step": 3410, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.69669948, + "epoch": 2.68, + "learning_rate": 8.758058678325156e-05, + "loss": 0.94466686, + "memory(GiB)": 71.19, + "step": 3415, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.699084, + "epoch": 2.68, + "learning_rate": 8.753778329758773e-05, + "loss": 0.96061077, + "memory(GiB)": 71.19, + "step": 3420, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.70037179, + "epoch": 2.68, + "learning_rate": 8.74949166759925e-05, + "loss": 0.95514784, + "memory(GiB)": 71.19, + "step": 3425, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.69292159, + "epoch": 2.69, + "learning_rate": 8.745198699056451e-05, + "loss": 0.98498917, + "memory(GiB)": 71.19, + "step": 3430, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70176616, + "epoch": 2.69, + "learning_rate": 8.740899431350852e-05, + "loss": 0.95132113, + "memory(GiB)": 71.19, + "step": 3435, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70519481, + "epoch": 2.7, + "learning_rate": 8.736593871713523e-05, + "loss": 0.95861454, + "memory(GiB)": 71.19, + "step": 3440, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.69500475, + "epoch": 2.7, + "learning_rate": 8.732282027386111e-05, + "loss": 0.96890717, + "memory(GiB)": 71.19, + "step": 3445, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69789162, + "epoch": 2.7, + "learning_rate": 8.72796390562084e-05, + "loss": 0.93789511, + "memory(GiB)": 71.19, + "step": 3450, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.6822947, + "epoch": 2.71, + "learning_rate": 8.723639513680486e-05, + "loss": 0.99033756, + "memory(GiB)": 71.19, + "step": 3455, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.70193062, + "epoch": 2.71, + "learning_rate": 8.719308858838377e-05, + "loss": 0.95832653, + "memory(GiB)": 71.19, + "step": 3460, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.70000944, + "epoch": 2.72, + "learning_rate": 8.714971948378374e-05, + "loss": 0.94106884, + "memory(GiB)": 71.19, + "step": 3465, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69467239, + "epoch": 2.72, + "learning_rate": 8.710628789594855e-05, + "loss": 0.97685585, + "memory(GiB)": 71.19, + "step": 3470, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69117222, + "epoch": 2.72, + "learning_rate": 8.706279389792708e-05, + "loss": 0.9753437, + "memory(GiB)": 71.19, + "step": 3475, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.7065135, + "epoch": 2.73, + "learning_rate": 8.701923756287325e-05, + "loss": 0.92029209, + "memory(GiB)": 71.19, + "step": 3480, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.68631201, + "epoch": 2.73, + "learning_rate": 8.697561896404573e-05, + "loss": 0.97980461, + "memory(GiB)": 71.19, + "step": 3485, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69038978, + "epoch": 2.74, + "learning_rate": 8.693193817480798e-05, + "loss": 0.96484947, + "memory(GiB)": 71.19, + "step": 3490, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.68747773, + "epoch": 2.74, + "learning_rate": 8.688819526862803e-05, + "loss": 0.97741976, + "memory(GiB)": 71.19, + "step": 3495, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69307537, + "epoch": 2.74, + "learning_rate": 8.684439031907843e-05, + "loss": 0.97401991, + "memory(GiB)": 71.19, + "step": 3500, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.68593907, + "epoch": 2.75, + "learning_rate": 8.6800523399836e-05, + "loss": 0.98575306, + "memory(GiB)": 71.19, + "step": 3505, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.69273987, + "epoch": 2.75, + "learning_rate": 8.675659458468184e-05, + "loss": 0.97432585, + "memory(GiB)": 71.19, + "step": 3510, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69315062, + "epoch": 2.75, + "learning_rate": 8.671260394750119e-05, + "loss": 0.95112991, + "memory(GiB)": 71.19, + "step": 3515, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69168601, + "epoch": 2.76, + "learning_rate": 8.66685515622832e-05, + "loss": 0.98360157, + "memory(GiB)": 71.19, + "step": 3520, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69696198, + "epoch": 2.76, + "learning_rate": 8.66244375031209e-05, + "loss": 0.95998116, + "memory(GiB)": 71.19, + "step": 3525, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.70211124, + "epoch": 2.77, + "learning_rate": 8.658026184421108e-05, + "loss": 0.91666279, + "memory(GiB)": 71.19, + "step": 3530, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.70166306, + "epoch": 2.77, + "learning_rate": 8.653602465985411e-05, + "loss": 0.94730425, + "memory(GiB)": 71.19, + "step": 3535, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69360576, + "epoch": 2.77, + "learning_rate": 8.649172602445384e-05, + "loss": 0.96196995, + "memory(GiB)": 71.19, + "step": 3540, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69113541, + "epoch": 2.78, + "learning_rate": 8.644736601251749e-05, + "loss": 0.97629557, + "memory(GiB)": 71.19, + "step": 3545, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69766083, + "epoch": 2.78, + "learning_rate": 8.640294469865549e-05, + "loss": 0.97009659, + "memory(GiB)": 71.19, + "step": 3550, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.7054462, + "epoch": 2.79, + "learning_rate": 8.635846215758141e-05, + "loss": 0.94846039, + "memory(GiB)": 71.19, + "step": 3555, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.68731637, + "epoch": 2.79, + "learning_rate": 8.631391846411177e-05, + "loss": 1.00905914, + "memory(GiB)": 71.19, + "step": 3560, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.7051527, + "epoch": 2.79, + "learning_rate": 8.626931369316594e-05, + "loss": 0.93751678, + "memory(GiB)": 71.19, + "step": 3565, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69043989, + "epoch": 2.8, + "learning_rate": 8.622464791976604e-05, + "loss": 0.97965469, + "memory(GiB)": 71.19, + "step": 3570, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.68530822, + "epoch": 2.8, + "learning_rate": 8.617992121903679e-05, + "loss": 1.03002996, + "memory(GiB)": 71.19, + "step": 3575, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69462538, + "epoch": 2.81, + "learning_rate": 8.613513366620538e-05, + "loss": 0.958325, + "memory(GiB)": 71.19, + "step": 3580, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.70790119, + "epoch": 2.81, + "learning_rate": 8.609028533660135e-05, + "loss": 0.91673079, + "memory(GiB)": 71.19, + "step": 3585, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69034023, + "epoch": 2.81, + "learning_rate": 8.604537630565644e-05, + "loss": 0.9714694, + "memory(GiB)": 71.19, + "step": 3590, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.69581952, + "epoch": 2.82, + "learning_rate": 8.600040664890453e-05, + "loss": 0.96100874, + "memory(GiB)": 71.19, + "step": 3595, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.70398974, + "epoch": 2.82, + "learning_rate": 8.595537644198141e-05, + "loss": 0.9286829, + "memory(GiB)": 71.19, + "step": 3600, + "train_speed(iter/s)": 0.024199 + }, + { + "acc": 0.68837748, + "epoch": 2.83, + "learning_rate": 8.591028576062478e-05, + "loss": 0.99620714, + "memory(GiB)": 71.19, + "step": 3605, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.69134026, + "epoch": 2.83, + "learning_rate": 8.586513468067404e-05, + "loss": 0.97660503, + "memory(GiB)": 71.19, + "step": 3610, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.69011774, + "epoch": 2.83, + "learning_rate": 8.581992327807007e-05, + "loss": 0.9757412, + "memory(GiB)": 71.19, + "step": 3615, + "train_speed(iter/s)": 0.0242 + }, + { + "acc": 0.6966249, + "epoch": 2.84, + "learning_rate": 8.577465162885538e-05, + "loss": 0.96154041, + "memory(GiB)": 71.19, + "step": 3620, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.70414891, + "epoch": 2.84, + "learning_rate": 8.572931980917366e-05, + "loss": 0.95618954, + "memory(GiB)": 71.19, + "step": 3625, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.69498696, + "epoch": 2.85, + "learning_rate": 8.568392789526991e-05, + "loss": 0.9771987, + "memory(GiB)": 71.19, + "step": 3630, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.69978442, + "epoch": 2.85, + "learning_rate": 8.563847596349015e-05, + "loss": 0.93518782, + "memory(GiB)": 71.19, + "step": 3635, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.70075121, + "epoch": 2.85, + "learning_rate": 8.559296409028134e-05, + "loss": 0.95782223, + "memory(GiB)": 71.19, + "step": 3640, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.69935665, + "epoch": 2.86, + "learning_rate": 8.554739235219129e-05, + "loss": 0.96709738, + "memory(GiB)": 71.19, + "step": 3645, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.70329223, + "epoch": 2.86, + "learning_rate": 8.550176082586847e-05, + "loss": 0.92794161, + "memory(GiB)": 71.19, + "step": 3650, + "train_speed(iter/s)": 0.024201 + }, + { + "acc": 0.69627366, + "epoch": 2.86, + "learning_rate": 8.545606958806195e-05, + "loss": 0.96420841, + "memory(GiB)": 71.19, + "step": 3655, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.69576621, + "epoch": 2.87, + "learning_rate": 8.541031871562118e-05, + "loss": 0.97613201, + "memory(GiB)": 71.19, + "step": 3660, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.69630737, + "epoch": 2.87, + "learning_rate": 8.536450828549593e-05, + "loss": 0.93979073, + "memory(GiB)": 71.19, + "step": 3665, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.68497229, + "epoch": 2.88, + "learning_rate": 8.531863837473617e-05, + "loss": 0.98675127, + "memory(GiB)": 71.19, + "step": 3670, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.68888111, + "epoch": 2.88, + "learning_rate": 8.527270906049185e-05, + "loss": 0.99011154, + "memory(GiB)": 71.19, + "step": 3675, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.70074887, + "epoch": 2.88, + "learning_rate": 8.522672042001291e-05, + "loss": 0.95177927, + "memory(GiB)": 71.19, + "step": 3680, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69165268, + "epoch": 2.89, + "learning_rate": 8.5180672530649e-05, + "loss": 0.97305288, + "memory(GiB)": 71.19, + "step": 3685, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69212561, + "epoch": 2.89, + "learning_rate": 8.51345654698495e-05, + "loss": 0.96181002, + "memory(GiB)": 71.19, + "step": 3690, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.7015995, + "epoch": 2.9, + "learning_rate": 8.508839931516322e-05, + "loss": 0.94027262, + "memory(GiB)": 71.19, + "step": 3695, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.6916369, + "epoch": 2.9, + "learning_rate": 8.504217414423843e-05, + "loss": 0.96168337, + "memory(GiB)": 71.19, + "step": 3700, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69469385, + "epoch": 2.9, + "learning_rate": 8.499589003482264e-05, + "loss": 0.96138353, + "memory(GiB)": 71.19, + "step": 3705, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69942169, + "epoch": 2.91, + "learning_rate": 8.49495470647625e-05, + "loss": 0.96778059, + "memory(GiB)": 71.19, + "step": 3710, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.69427686, + "epoch": 2.91, + "learning_rate": 8.490314531200365e-05, + "loss": 0.97816887, + "memory(GiB)": 71.19, + "step": 3715, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.7088768, + "epoch": 2.92, + "learning_rate": 8.485668485459057e-05, + "loss": 0.94817715, + "memory(GiB)": 71.19, + "step": 3720, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69802999, + "epoch": 2.92, + "learning_rate": 8.481016577066654e-05, + "loss": 0.97512226, + "memory(GiB)": 71.19, + "step": 3725, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.70213175, + "epoch": 2.92, + "learning_rate": 8.47635881384734e-05, + "loss": 0.94330521, + "memory(GiB)": 71.19, + "step": 3730, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.68532333, + "epoch": 2.93, + "learning_rate": 8.47169520363515e-05, + "loss": 0.96857815, + "memory(GiB)": 71.19, + "step": 3735, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.69682803, + "epoch": 2.93, + "learning_rate": 8.467025754273947e-05, + "loss": 0.98179436, + "memory(GiB)": 71.19, + "step": 3740, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.69457374, + "epoch": 2.94, + "learning_rate": 8.46235047361742e-05, + "loss": 0.97254868, + "memory(GiB)": 71.19, + "step": 3745, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.69419403, + "epoch": 2.94, + "learning_rate": 8.457669369529066e-05, + "loss": 0.95455761, + "memory(GiB)": 71.19, + "step": 3750, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.70040722, + "epoch": 2.94, + "learning_rate": 8.452982449882175e-05, + "loss": 0.92892199, + "memory(GiB)": 71.19, + "step": 3755, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.70409389, + "epoch": 2.95, + "learning_rate": 8.448289722559816e-05, + "loss": 0.93102856, + "memory(GiB)": 71.19, + "step": 3760, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69685011, + "epoch": 2.95, + "learning_rate": 8.443591195454834e-05, + "loss": 0.96960402, + "memory(GiB)": 71.19, + "step": 3765, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69049578, + "epoch": 2.95, + "learning_rate": 8.438886876469818e-05, + "loss": 0.9981617, + "memory(GiB)": 71.19, + "step": 3770, + "train_speed(iter/s)": 0.024204 + }, + { + "acc": 0.70177493, + "epoch": 2.96, + "learning_rate": 8.43417677351711e-05, + "loss": 0.91999292, + "memory(GiB)": 71.19, + "step": 3775, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69396958, + "epoch": 2.96, + "learning_rate": 8.429460894518771e-05, + "loss": 0.98489656, + "memory(GiB)": 71.19, + "step": 3780, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69775419, + "epoch": 2.97, + "learning_rate": 8.424739247406579e-05, + "loss": 0.95434799, + "memory(GiB)": 71.19, + "step": 3785, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.69874148, + "epoch": 2.97, + "learning_rate": 8.420011840122016e-05, + "loss": 0.94881783, + "memory(GiB)": 71.19, + "step": 3790, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.70053377, + "epoch": 2.97, + "learning_rate": 8.415278680616252e-05, + "loss": 0.95457497, + "memory(GiB)": 71.19, + "step": 3795, + "train_speed(iter/s)": 0.024203 + }, + { + "acc": 0.68822541, + "epoch": 2.98, + "learning_rate": 8.410539776850133e-05, + "loss": 0.97871866, + "memory(GiB)": 71.19, + "step": 3800, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.69143753, + "epoch": 2.98, + "learning_rate": 8.405795136794159e-05, + "loss": 0.9598156, + "memory(GiB)": 71.19, + "step": 3805, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.68940425, + "epoch": 2.99, + "learning_rate": 8.401044768428487e-05, + "loss": 0.98408442, + "memory(GiB)": 71.19, + "step": 3810, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.71102018, + "epoch": 2.99, + "learning_rate": 8.396288679742905e-05, + "loss": 0.91080713, + "memory(GiB)": 71.19, + "step": 3815, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.69605985, + "epoch": 2.99, + "learning_rate": 8.391526878736822e-05, + "loss": 0.96173496, + "memory(GiB)": 71.19, + "step": 3820, + "train_speed(iter/s)": 0.024202 + }, + { + "acc": 0.68129706, + "epoch": 3.0, + "learning_rate": 8.386759373419252e-05, + "loss": 1.0301609, + "memory(GiB)": 71.19, + "step": 3825, + "train_speed(iter/s)": 0.024203 + }, + { + "epoch": 3.0, + "eval_acc": 0.7299023290758828, + "eval_loss": 0.811944305896759, + "eval_runtime": 107.6655, + "eval_samples_per_second": 0.864, + "eval_steps_per_second": 0.864, + "step": 3827 + }, + { + "acc": 0.70716329, + "epoch": 3.0, + "learning_rate": 8.381986171808811e-05, + "loss": 0.90910578, + "memory(GiB)": 71.19, + "step": 3830, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.73387895, + "epoch": 3.01, + "learning_rate": 8.377207281933687e-05, + "loss": 0.82784376, + "memory(GiB)": 71.19, + "step": 3835, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.72814317, + "epoch": 3.01, + "learning_rate": 8.372422711831644e-05, + "loss": 0.83022747, + "memory(GiB)": 71.19, + "step": 3840, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.72625008, + "epoch": 3.01, + "learning_rate": 8.367632469549989e-05, + "loss": 0.82957611, + "memory(GiB)": 71.19, + "step": 3845, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.7326128, + "epoch": 3.02, + "learning_rate": 8.362836563145578e-05, + "loss": 0.82128582, + "memory(GiB)": 71.19, + "step": 3850, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.7478919, + "epoch": 3.02, + "learning_rate": 8.358035000684791e-05, + "loss": 0.77482996, + "memory(GiB)": 71.19, + "step": 3855, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.73634334, + "epoch": 3.03, + "learning_rate": 8.353227790243521e-05, + "loss": 0.804041, + "memory(GiB)": 71.19, + "step": 3860, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.73621793, + "epoch": 3.03, + "learning_rate": 8.34841493990716e-05, + "loss": 0.80979652, + "memory(GiB)": 71.19, + "step": 3865, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.7203692, + "epoch": 3.03, + "learning_rate": 8.343596457770587e-05, + "loss": 0.84263067, + "memory(GiB)": 71.19, + "step": 3870, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.71783953, + "epoch": 3.04, + "learning_rate": 8.338772351938148e-05, + "loss": 0.87986736, + "memory(GiB)": 71.19, + "step": 3875, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.74515467, + "epoch": 3.04, + "learning_rate": 8.333942630523662e-05, + "loss": 0.78914156, + "memory(GiB)": 71.19, + "step": 3880, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.71863422, + "epoch": 3.04, + "learning_rate": 8.329107301650374e-05, + "loss": 0.85929756, + "memory(GiB)": 71.19, + "step": 3885, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.73198423, + "epoch": 3.05, + "learning_rate": 8.324266373450974e-05, + "loss": 0.84285383, + "memory(GiB)": 71.19, + "step": 3890, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.71086202, + "epoch": 3.05, + "learning_rate": 8.319419854067564e-05, + "loss": 0.89417582, + "memory(GiB)": 71.19, + "step": 3895, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.73028541, + "epoch": 3.06, + "learning_rate": 8.314567751651654e-05, + "loss": 0.84059143, + "memory(GiB)": 71.19, + "step": 3900, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.73362865, + "epoch": 3.06, + "learning_rate": 8.309710074364138e-05, + "loss": 0.82103605, + "memory(GiB)": 71.19, + "step": 3905, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.74105625, + "epoch": 3.06, + "learning_rate": 8.304846830375292e-05, + "loss": 0.7890769, + "memory(GiB)": 71.19, + "step": 3910, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.73808637, + "epoch": 3.07, + "learning_rate": 8.299978027864752e-05, + "loss": 0.81043692, + "memory(GiB)": 71.19, + "step": 3915, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.71901126, + "epoch": 3.07, + "learning_rate": 8.295103675021505e-05, + "loss": 0.85230951, + "memory(GiB)": 71.19, + "step": 3920, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.7334156, + "epoch": 3.08, + "learning_rate": 8.290223780043874e-05, + "loss": 0.8069334, + "memory(GiB)": 71.19, + "step": 3925, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.72944226, + "epoch": 3.08, + "learning_rate": 8.285338351139496e-05, + "loss": 0.84712934, + "memory(GiB)": 71.19, + "step": 3930, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.73505478, + "epoch": 3.08, + "learning_rate": 8.280447396525328e-05, + "loss": 0.79626627, + "memory(GiB)": 71.19, + "step": 3935, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.72741809, + "epoch": 3.09, + "learning_rate": 8.275550924427609e-05, + "loss": 0.82367277, + "memory(GiB)": 71.19, + "step": 3940, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.72524977, + "epoch": 3.09, + "learning_rate": 8.270648943081867e-05, + "loss": 0.84687901, + "memory(GiB)": 71.19, + "step": 3945, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.73152761, + "epoch": 3.1, + "learning_rate": 8.26574146073289e-05, + "loss": 0.84828215, + "memory(GiB)": 71.19, + "step": 3950, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.72471776, + "epoch": 3.1, + "learning_rate": 8.260828485634722e-05, + "loss": 0.86218882, + "memory(GiB)": 71.19, + "step": 3955, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.7207871, + "epoch": 3.1, + "learning_rate": 8.255910026050643e-05, + "loss": 0.84007759, + "memory(GiB)": 71.19, + "step": 3960, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.73610811, + "epoch": 3.11, + "learning_rate": 8.250986090253163e-05, + "loss": 0.82142134, + "memory(GiB)": 71.19, + "step": 3965, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.73013797, + "epoch": 3.11, + "learning_rate": 8.246056686523994e-05, + "loss": 0.83745384, + "memory(GiB)": 71.19, + "step": 3970, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.73723321, + "epoch": 3.12, + "learning_rate": 8.241121823154047e-05, + "loss": 0.81523333, + "memory(GiB)": 71.19, + "step": 3975, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.7233716, + "epoch": 3.12, + "learning_rate": 8.236181508443424e-05, + "loss": 0.83602371, + "memory(GiB)": 71.19, + "step": 3980, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.72590818, + "epoch": 3.12, + "learning_rate": 8.231235750701385e-05, + "loss": 0.8545907, + "memory(GiB)": 71.19, + "step": 3985, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.7300528, + "epoch": 3.13, + "learning_rate": 8.226284558246351e-05, + "loss": 0.82412634, + "memory(GiB)": 71.19, + "step": 3990, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.72257438, + "epoch": 3.13, + "learning_rate": 8.221327939405881e-05, + "loss": 0.85074911, + "memory(GiB)": 71.19, + "step": 3995, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.73442616, + "epoch": 3.14, + "learning_rate": 8.216365902516664e-05, + "loss": 0.80528517, + "memory(GiB)": 71.19, + "step": 4000, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.72549343, + "epoch": 3.14, + "learning_rate": 8.211398455924497e-05, + "loss": 0.86022482, + "memory(GiB)": 71.19, + "step": 4005, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.72123246, + "epoch": 3.14, + "learning_rate": 8.206425607984282e-05, + "loss": 0.86917925, + "memory(GiB)": 71.19, + "step": 4010, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.73181915, + "epoch": 3.15, + "learning_rate": 8.201447367059998e-05, + "loss": 0.82419157, + "memory(GiB)": 71.19, + "step": 4015, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.72473426, + "epoch": 3.15, + "learning_rate": 8.196463741524701e-05, + "loss": 0.85585718, + "memory(GiB)": 71.19, + "step": 4020, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72140322, + "epoch": 3.15, + "learning_rate": 8.1914747397605e-05, + "loss": 0.85199308, + "memory(GiB)": 71.19, + "step": 4025, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73175788, + "epoch": 3.16, + "learning_rate": 8.186480370158551e-05, + "loss": 0.85048962, + "memory(GiB)": 71.19, + "step": 4030, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72417698, + "epoch": 3.16, + "learning_rate": 8.181480641119032e-05, + "loss": 0.84959297, + "memory(GiB)": 71.19, + "step": 4035, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.72861757, + "epoch": 3.17, + "learning_rate": 8.176475561051137e-05, + "loss": 0.83446856, + "memory(GiB)": 71.19, + "step": 4040, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.73830481, + "epoch": 3.17, + "learning_rate": 8.171465138373067e-05, + "loss": 0.79196048, + "memory(GiB)": 71.19, + "step": 4045, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.73525729, + "epoch": 3.17, + "learning_rate": 8.166449381511998e-05, + "loss": 0.80135098, + "memory(GiB)": 71.19, + "step": 4050, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.72653871, + "epoch": 3.18, + "learning_rate": 8.161428298904085e-05, + "loss": 0.84726629, + "memory(GiB)": 71.19, + "step": 4055, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.73992734, + "epoch": 3.18, + "learning_rate": 8.156401898994436e-05, + "loss": 0.80075979, + "memory(GiB)": 71.19, + "step": 4060, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.7188323, + "epoch": 3.19, + "learning_rate": 8.151370190237108e-05, + "loss": 0.88397989, + "memory(GiB)": 71.19, + "step": 4065, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.73720374, + "epoch": 3.19, + "learning_rate": 8.146333181095084e-05, + "loss": 0.82127218, + "memory(GiB)": 71.19, + "step": 4070, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.72629747, + "epoch": 3.19, + "learning_rate": 8.14129088004026e-05, + "loss": 0.86305199, + "memory(GiB)": 71.19, + "step": 4075, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.73932276, + "epoch": 3.2, + "learning_rate": 8.136243295553434e-05, + "loss": 0.82476034, + "memory(GiB)": 71.19, + "step": 4080, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.72464457, + "epoch": 3.2, + "learning_rate": 8.131190436124294e-05, + "loss": 0.86362638, + "memory(GiB)": 71.19, + "step": 4085, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.72220616, + "epoch": 3.21, + "learning_rate": 8.126132310251393e-05, + "loss": 0.83337736, + "memory(GiB)": 71.19, + "step": 4090, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73724484, + "epoch": 3.21, + "learning_rate": 8.121068926442148e-05, + "loss": 0.82069883, + "memory(GiB)": 71.19, + "step": 4095, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7229681, + "epoch": 3.21, + "learning_rate": 8.116000293212815e-05, + "loss": 0.85246668, + "memory(GiB)": 71.19, + "step": 4100, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73643503, + "epoch": 3.22, + "learning_rate": 8.110926419088485e-05, + "loss": 0.83178596, + "memory(GiB)": 71.19, + "step": 4105, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73539133, + "epoch": 3.22, + "learning_rate": 8.105847312603056e-05, + "loss": 0.82203569, + "memory(GiB)": 71.19, + "step": 4110, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73065267, + "epoch": 3.23, + "learning_rate": 8.100762982299232e-05, + "loss": 0.79565067, + "memory(GiB)": 71.19, + "step": 4115, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72940273, + "epoch": 3.23, + "learning_rate": 8.095673436728504e-05, + "loss": 0.83768406, + "memory(GiB)": 71.19, + "step": 4120, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72964163, + "epoch": 3.23, + "learning_rate": 8.090578684451131e-05, + "loss": 0.84069805, + "memory(GiB)": 71.19, + "step": 4125, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.73201489, + "epoch": 3.24, + "learning_rate": 8.085478734036129e-05, + "loss": 0.82388248, + "memory(GiB)": 71.19, + "step": 4130, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72238297, + "epoch": 3.24, + "learning_rate": 8.080373594061261e-05, + "loss": 0.85109663, + "memory(GiB)": 71.19, + "step": 4135, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.74465365, + "epoch": 3.24, + "learning_rate": 8.075263273113013e-05, + "loss": 0.78524365, + "memory(GiB)": 71.19, + "step": 4140, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71745272, + "epoch": 3.25, + "learning_rate": 8.070147779786593e-05, + "loss": 0.87544708, + "memory(GiB)": 71.19, + "step": 4145, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71826987, + "epoch": 3.25, + "learning_rate": 8.0650271226859e-05, + "loss": 0.8916872, + "memory(GiB)": 71.19, + "step": 4150, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72508607, + "epoch": 3.26, + "learning_rate": 8.05990131042352e-05, + "loss": 0.83632765, + "memory(GiB)": 71.19, + "step": 4155, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72774878, + "epoch": 3.26, + "learning_rate": 8.054770351620718e-05, + "loss": 0.84630947, + "memory(GiB)": 71.19, + "step": 4160, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71050835, + "epoch": 3.26, + "learning_rate": 8.049634254907404e-05, + "loss": 0.88207722, + "memory(GiB)": 71.19, + "step": 4165, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.73926072, + "epoch": 3.27, + "learning_rate": 8.044493028922133e-05, + "loss": 0.80433979, + "memory(GiB)": 71.19, + "step": 4170, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7169281, + "epoch": 3.27, + "learning_rate": 8.03934668231209e-05, + "loss": 0.85596704, + "memory(GiB)": 71.19, + "step": 4175, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.7407104, + "epoch": 3.28, + "learning_rate": 8.034195223733074e-05, + "loss": 0.80006132, + "memory(GiB)": 71.19, + "step": 4180, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72462053, + "epoch": 3.28, + "learning_rate": 8.029038661849472e-05, + "loss": 0.85594482, + "memory(GiB)": 71.19, + "step": 4185, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72585344, + "epoch": 3.28, + "learning_rate": 8.023877005334268e-05, + "loss": 0.84595318, + "memory(GiB)": 71.19, + "step": 4190, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7315063, + "epoch": 3.29, + "learning_rate": 8.018710262869005e-05, + "loss": 0.82566252, + "memory(GiB)": 71.19, + "step": 4195, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.7300137, + "epoch": 3.29, + "learning_rate": 8.013538443143782e-05, + "loss": 0.8305336, + "memory(GiB)": 71.19, + "step": 4200, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72604132, + "epoch": 3.3, + "learning_rate": 8.008361554857237e-05, + "loss": 0.85777216, + "memory(GiB)": 71.19, + "step": 4205, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71913915, + "epoch": 3.3, + "learning_rate": 8.003179606716543e-05, + "loss": 0.85751801, + "memory(GiB)": 71.19, + "step": 4210, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71904831, + "epoch": 3.3, + "learning_rate": 7.997992607437365e-05, + "loss": 0.86428804, + "memory(GiB)": 71.19, + "step": 4215, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71747947, + "epoch": 3.31, + "learning_rate": 7.992800565743882e-05, + "loss": 0.86572142, + "memory(GiB)": 71.19, + "step": 4220, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72012987, + "epoch": 3.31, + "learning_rate": 7.987603490368741e-05, + "loss": 0.85723791, + "memory(GiB)": 71.19, + "step": 4225, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73201785, + "epoch": 3.32, + "learning_rate": 7.98240139005306e-05, + "loss": 0.8323741, + "memory(GiB)": 71.19, + "step": 4230, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72473407, + "epoch": 3.32, + "learning_rate": 7.977194273546411e-05, + "loss": 0.85458097, + "memory(GiB)": 71.19, + "step": 4235, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73437066, + "epoch": 3.32, + "learning_rate": 7.971982149606799e-05, + "loss": 0.80146246, + "memory(GiB)": 71.19, + "step": 4240, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71682711, + "epoch": 3.33, + "learning_rate": 7.966765027000654e-05, + "loss": 0.86212845, + "memory(GiB)": 71.19, + "step": 4245, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72666965, + "epoch": 3.33, + "learning_rate": 7.961542914502808e-05, + "loss": 0.84145832, + "memory(GiB)": 71.19, + "step": 4250, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72355342, + "epoch": 3.33, + "learning_rate": 7.956315820896496e-05, + "loss": 0.85306997, + "memory(GiB)": 71.19, + "step": 4255, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73941717, + "epoch": 3.34, + "learning_rate": 7.951083754973321e-05, + "loss": 0.82038488, + "memory(GiB)": 71.19, + "step": 4260, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71963987, + "epoch": 3.34, + "learning_rate": 7.945846725533251e-05, + "loss": 0.8572114, + "memory(GiB)": 71.19, + "step": 4265, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73555474, + "epoch": 3.35, + "learning_rate": 7.940604741384607e-05, + "loss": 0.82101889, + "memory(GiB)": 71.19, + "step": 4270, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.74150233, + "epoch": 3.35, + "learning_rate": 7.935357811344042e-05, + "loss": 0.80582638, + "memory(GiB)": 71.19, + "step": 4275, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72068758, + "epoch": 3.35, + "learning_rate": 7.93010594423652e-05, + "loss": 0.8544672, + "memory(GiB)": 71.19, + "step": 4280, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73440638, + "epoch": 3.36, + "learning_rate": 7.924849148895321e-05, + "loss": 0.80622263, + "memory(GiB)": 71.19, + "step": 4285, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72935481, + "epoch": 3.36, + "learning_rate": 7.919587434162004e-05, + "loss": 0.82818518, + "memory(GiB)": 71.19, + "step": 4290, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72351742, + "epoch": 3.37, + "learning_rate": 7.914320808886409e-05, + "loss": 0.85957403, + "memory(GiB)": 71.19, + "step": 4295, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7248076, + "epoch": 3.37, + "learning_rate": 7.909049281926629e-05, + "loss": 0.8453124, + "memory(GiB)": 71.19, + "step": 4300, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72555737, + "epoch": 3.37, + "learning_rate": 7.903772862149004e-05, + "loss": 0.8440258, + "memory(GiB)": 71.19, + "step": 4305, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72598963, + "epoch": 3.38, + "learning_rate": 7.898491558428108e-05, + "loss": 0.83704453, + "memory(GiB)": 71.19, + "step": 4310, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71948795, + "epoch": 3.38, + "learning_rate": 7.893205379646724e-05, + "loss": 0.86024466, + "memory(GiB)": 71.19, + "step": 4315, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73115282, + "epoch": 3.39, + "learning_rate": 7.887914334695831e-05, + "loss": 0.83252287, + "memory(GiB)": 71.19, + "step": 4320, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72599154, + "epoch": 3.39, + "learning_rate": 7.882618432474604e-05, + "loss": 0.83888359, + "memory(GiB)": 71.19, + "step": 4325, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73242888, + "epoch": 3.39, + "learning_rate": 7.877317681890376e-05, + "loss": 0.81837931, + "memory(GiB)": 71.19, + "step": 4330, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.742377, + "epoch": 3.4, + "learning_rate": 7.87201209185864e-05, + "loss": 0.80096016, + "memory(GiB)": 71.19, + "step": 4335, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7232306, + "epoch": 3.4, + "learning_rate": 7.866701671303032e-05, + "loss": 0.86032343, + "memory(GiB)": 71.19, + "step": 4340, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72954855, + "epoch": 3.41, + "learning_rate": 7.861386429155304e-05, + "loss": 0.82318716, + "memory(GiB)": 71.19, + "step": 4345, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71807418, + "epoch": 3.41, + "learning_rate": 7.856066374355326e-05, + "loss": 0.88005505, + "memory(GiB)": 71.19, + "step": 4350, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73638568, + "epoch": 3.41, + "learning_rate": 7.850741515851057e-05, + "loss": 0.81349354, + "memory(GiB)": 71.19, + "step": 4355, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.73490205, + "epoch": 3.42, + "learning_rate": 7.845411862598537e-05, + "loss": 0.83284054, + "memory(GiB)": 71.19, + "step": 4360, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.73105078, + "epoch": 3.42, + "learning_rate": 7.840077423561871e-05, + "loss": 0.84245071, + "memory(GiB)": 71.19, + "step": 4365, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.7196713, + "epoch": 3.43, + "learning_rate": 7.834738207713213e-05, + "loss": 0.86376743, + "memory(GiB)": 71.19, + "step": 4370, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72638869, + "epoch": 3.43, + "learning_rate": 7.829394224032753e-05, + "loss": 0.82913513, + "memory(GiB)": 71.19, + "step": 4375, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.73445344, + "epoch": 3.43, + "learning_rate": 7.824045481508696e-05, + "loss": 0.8318285, + "memory(GiB)": 71.19, + "step": 4380, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.7262754, + "epoch": 3.44, + "learning_rate": 7.818691989137255e-05, + "loss": 0.84009447, + "memory(GiB)": 71.19, + "step": 4385, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.7386354, + "epoch": 3.44, + "learning_rate": 7.813333755922632e-05, + "loss": 0.81151896, + "memory(GiB)": 71.19, + "step": 4390, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72992892, + "epoch": 3.44, + "learning_rate": 7.807970790876997e-05, + "loss": 0.81546345, + "memory(GiB)": 71.19, + "step": 4395, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73798876, + "epoch": 3.45, + "learning_rate": 7.802603103020487e-05, + "loss": 0.80516491, + "memory(GiB)": 71.19, + "step": 4400, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72618403, + "epoch": 3.45, + "learning_rate": 7.797230701381177e-05, + "loss": 0.83551912, + "memory(GiB)": 71.19, + "step": 4405, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71417842, + "epoch": 3.46, + "learning_rate": 7.791853594995072e-05, + "loss": 0.84696274, + "memory(GiB)": 71.19, + "step": 4410, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7323916, + "epoch": 3.46, + "learning_rate": 7.78647179290609e-05, + "loss": 0.82821932, + "memory(GiB)": 71.19, + "step": 4415, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72546659, + "epoch": 3.46, + "learning_rate": 7.781085304166042e-05, + "loss": 0.84084616, + "memory(GiB)": 71.19, + "step": 4420, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73983116, + "epoch": 3.47, + "learning_rate": 7.775694137834632e-05, + "loss": 0.80149288, + "memory(GiB)": 71.19, + "step": 4425, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72577829, + "epoch": 3.47, + "learning_rate": 7.770298302979421e-05, + "loss": 0.85075827, + "memory(GiB)": 71.19, + "step": 4430, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.731323, + "epoch": 3.48, + "learning_rate": 7.764897808675831e-05, + "loss": 0.83516245, + "memory(GiB)": 71.19, + "step": 4435, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72064219, + "epoch": 3.48, + "learning_rate": 7.759492664007114e-05, + "loss": 0.87227879, + "memory(GiB)": 71.19, + "step": 4440, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73189783, + "epoch": 3.48, + "learning_rate": 7.754082878064346e-05, + "loss": 0.82408819, + "memory(GiB)": 71.19, + "step": 4445, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7397872, + "epoch": 3.49, + "learning_rate": 7.748668459946408e-05, + "loss": 0.8035758, + "memory(GiB)": 71.19, + "step": 4450, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73634014, + "epoch": 3.49, + "learning_rate": 7.743249418759976e-05, + "loss": 0.8225769, + "memory(GiB)": 71.19, + "step": 4455, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72766705, + "epoch": 3.5, + "learning_rate": 7.7378257636195e-05, + "loss": 0.83043652, + "memory(GiB)": 71.19, + "step": 4460, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72869205, + "epoch": 3.5, + "learning_rate": 7.732397503647184e-05, + "loss": 0.83498526, + "memory(GiB)": 71.19, + "step": 4465, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71786594, + "epoch": 3.5, + "learning_rate": 7.726964647972988e-05, + "loss": 0.86003532, + "memory(GiB)": 71.19, + "step": 4470, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73369732, + "epoch": 3.51, + "learning_rate": 7.721527205734593e-05, + "loss": 0.82165928, + "memory(GiB)": 71.19, + "step": 4475, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71339808, + "epoch": 3.51, + "learning_rate": 7.716085186077398e-05, + "loss": 0.86701927, + "memory(GiB)": 71.19, + "step": 4480, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.72527127, + "epoch": 3.52, + "learning_rate": 7.7106385981545e-05, + "loss": 0.8485733, + "memory(GiB)": 71.19, + "step": 4485, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.71741381, + "epoch": 3.52, + "learning_rate": 7.70518745112668e-05, + "loss": 0.87526913, + "memory(GiB)": 71.19, + "step": 4490, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.73170438, + "epoch": 3.52, + "learning_rate": 7.699731754162388e-05, + "loss": 0.84059429, + "memory(GiB)": 71.19, + "step": 4495, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72498207, + "epoch": 3.53, + "learning_rate": 7.694271516437723e-05, + "loss": 0.84459229, + "memory(GiB)": 71.19, + "step": 4500, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.7239903, + "epoch": 3.53, + "learning_rate": 7.688806747136426e-05, + "loss": 0.84521217, + "memory(GiB)": 71.19, + "step": 4505, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.73728113, + "epoch": 3.53, + "learning_rate": 7.683337455449856e-05, + "loss": 0.82545519, + "memory(GiB)": 71.19, + "step": 4510, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72692027, + "epoch": 3.54, + "learning_rate": 7.677863650576979e-05, + "loss": 0.84334087, + "memory(GiB)": 71.19, + "step": 4515, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72908607, + "epoch": 3.54, + "learning_rate": 7.672385341724355e-05, + "loss": 0.82269535, + "memory(GiB)": 71.19, + "step": 4520, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72978554, + "epoch": 3.55, + "learning_rate": 7.666902538106118e-05, + "loss": 0.84637251, + "memory(GiB)": 71.19, + "step": 4525, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.71525011, + "epoch": 3.55, + "learning_rate": 7.661415248943958e-05, + "loss": 0.87230482, + "memory(GiB)": 71.19, + "step": 4530, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72793851, + "epoch": 3.55, + "learning_rate": 7.655923483467114e-05, + "loss": 0.81432896, + "memory(GiB)": 71.19, + "step": 4535, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.73044486, + "epoch": 3.56, + "learning_rate": 7.650427250912351e-05, + "loss": 0.84025393, + "memory(GiB)": 71.19, + "step": 4540, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72327971, + "epoch": 3.56, + "learning_rate": 7.644926560523952e-05, + "loss": 0.84944382, + "memory(GiB)": 71.19, + "step": 4545, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72713952, + "epoch": 3.57, + "learning_rate": 7.639421421553687e-05, + "loss": 0.83744144, + "memory(GiB)": 71.19, + "step": 4550, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.7387239, + "epoch": 3.57, + "learning_rate": 7.633911843260825e-05, + "loss": 0.82736578, + "memory(GiB)": 71.19, + "step": 4555, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.72111745, + "epoch": 3.57, + "learning_rate": 7.628397834912085e-05, + "loss": 0.8412631, + "memory(GiB)": 71.19, + "step": 4560, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.71875992, + "epoch": 3.58, + "learning_rate": 7.622879405781645e-05, + "loss": 0.85906324, + "memory(GiB)": 71.19, + "step": 4565, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72346978, + "epoch": 3.58, + "learning_rate": 7.617356565151122e-05, + "loss": 0.84273167, + "memory(GiB)": 71.19, + "step": 4570, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72537384, + "epoch": 3.59, + "learning_rate": 7.611829322309544e-05, + "loss": 0.86372871, + "memory(GiB)": 71.19, + "step": 4575, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72613001, + "epoch": 3.59, + "learning_rate": 7.60629768655335e-05, + "loss": 0.83706751, + "memory(GiB)": 71.19, + "step": 4580, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72427754, + "epoch": 3.59, + "learning_rate": 7.600761667186362e-05, + "loss": 0.83796959, + "memory(GiB)": 71.19, + "step": 4585, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.73405333, + "epoch": 3.6, + "learning_rate": 7.595221273519783e-05, + "loss": 0.84556036, + "memory(GiB)": 71.19, + "step": 4590, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72501197, + "epoch": 3.6, + "learning_rate": 7.589676514872165e-05, + "loss": 0.83936701, + "memory(GiB)": 71.19, + "step": 4595, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.71823664, + "epoch": 3.61, + "learning_rate": 7.584127400569408e-05, + "loss": 0.85496521, + "memory(GiB)": 71.19, + "step": 4600, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72726426, + "epoch": 3.61, + "learning_rate": 7.57857393994473e-05, + "loss": 0.84609032, + "memory(GiB)": 71.19, + "step": 4605, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72315588, + "epoch": 3.61, + "learning_rate": 7.573016142338668e-05, + "loss": 0.85114698, + "memory(GiB)": 71.19, + "step": 4610, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.71871891, + "epoch": 3.62, + "learning_rate": 7.56745401709905e-05, + "loss": 0.8591588, + "memory(GiB)": 71.19, + "step": 4615, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73322353, + "epoch": 3.62, + "learning_rate": 7.56188757358098e-05, + "loss": 0.83172417, + "memory(GiB)": 71.19, + "step": 4620, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72749004, + "epoch": 3.62, + "learning_rate": 7.55631682114683e-05, + "loss": 0.84570827, + "memory(GiB)": 71.19, + "step": 4625, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73905587, + "epoch": 3.63, + "learning_rate": 7.550741769166214e-05, + "loss": 0.80478392, + "memory(GiB)": 71.19, + "step": 4630, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.73352895, + "epoch": 3.63, + "learning_rate": 7.545162427015981e-05, + "loss": 0.81894255, + "memory(GiB)": 71.19, + "step": 4635, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.73099337, + "epoch": 3.64, + "learning_rate": 7.539578804080198e-05, + "loss": 0.84308519, + "memory(GiB)": 71.19, + "step": 4640, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.71498265, + "epoch": 3.64, + "learning_rate": 7.533990909750125e-05, + "loss": 0.87364149, + "memory(GiB)": 71.19, + "step": 4645, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.71505318, + "epoch": 3.64, + "learning_rate": 7.528398753424213e-05, + "loss": 0.86194353, + "memory(GiB)": 71.19, + "step": 4650, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7217392, + "epoch": 3.65, + "learning_rate": 7.522802344508078e-05, + "loss": 0.8368371, + "memory(GiB)": 71.19, + "step": 4655, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.74326339, + "epoch": 3.65, + "learning_rate": 7.517201692414488e-05, + "loss": 0.79576321, + "memory(GiB)": 71.19, + "step": 4660, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72128105, + "epoch": 3.66, + "learning_rate": 7.51159680656335e-05, + "loss": 0.84689484, + "memory(GiB)": 71.19, + "step": 4665, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.73218131, + "epoch": 3.66, + "learning_rate": 7.505987696381691e-05, + "loss": 0.80757494, + "memory(GiB)": 71.19, + "step": 4670, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72402844, + "epoch": 3.66, + "learning_rate": 7.500374371303643e-05, + "loss": 0.83772345, + "memory(GiB)": 71.19, + "step": 4675, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73203745, + "epoch": 3.67, + "learning_rate": 7.494756840770425e-05, + "loss": 0.82900801, + "memory(GiB)": 71.19, + "step": 4680, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73576927, + "epoch": 3.67, + "learning_rate": 7.489135114230333e-05, + "loss": 0.82100172, + "memory(GiB)": 71.19, + "step": 4685, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72248497, + "epoch": 3.68, + "learning_rate": 7.483509201138717e-05, + "loss": 0.83817987, + "memory(GiB)": 71.19, + "step": 4690, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72471027, + "epoch": 3.68, + "learning_rate": 7.477879110957972e-05, + "loss": 0.83272848, + "memory(GiB)": 71.19, + "step": 4695, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72231956, + "epoch": 3.68, + "learning_rate": 7.472244853157517e-05, + "loss": 0.85366192, + "memory(GiB)": 71.19, + "step": 4700, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72966657, + "epoch": 3.69, + "learning_rate": 7.46660643721378e-05, + "loss": 0.84410038, + "memory(GiB)": 71.19, + "step": 4705, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.71471176, + "epoch": 3.69, + "learning_rate": 7.460963872610181e-05, + "loss": 0.86216116, + "memory(GiB)": 71.19, + "step": 4710, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.73702974, + "epoch": 3.7, + "learning_rate": 7.455317168837122e-05, + "loss": 0.83383703, + "memory(GiB)": 71.19, + "step": 4715, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.70991087, + "epoch": 3.7, + "learning_rate": 7.449666335391963e-05, + "loss": 0.8940443, + "memory(GiB)": 71.19, + "step": 4720, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.71537218, + "epoch": 3.7, + "learning_rate": 7.444011381779013e-05, + "loss": 0.86577148, + "memory(GiB)": 71.19, + "step": 4725, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72070432, + "epoch": 3.71, + "learning_rate": 7.438352317509508e-05, + "loss": 0.84208603, + "memory(GiB)": 71.19, + "step": 4730, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73044333, + "epoch": 3.71, + "learning_rate": 7.4326891521016e-05, + "loss": 0.82190485, + "memory(GiB)": 71.19, + "step": 4735, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72588086, + "epoch": 3.72, + "learning_rate": 7.427021895080339e-05, + "loss": 0.85073795, + "memory(GiB)": 71.19, + "step": 4740, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7267787, + "epoch": 3.72, + "learning_rate": 7.421350555977653e-05, + "loss": 0.83103905, + "memory(GiB)": 71.19, + "step": 4745, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72804618, + "epoch": 3.72, + "learning_rate": 7.415675144332339e-05, + "loss": 0.83636494, + "memory(GiB)": 71.19, + "step": 4750, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72480588, + "epoch": 3.73, + "learning_rate": 7.409995669690046e-05, + "loss": 0.84966698, + "memory(GiB)": 71.19, + "step": 4755, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.72015243, + "epoch": 3.73, + "learning_rate": 7.404312141603251e-05, + "loss": 0.86545382, + "memory(GiB)": 71.19, + "step": 4760, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.7205214, + "epoch": 3.73, + "learning_rate": 7.398624569631254e-05, + "loss": 0.86042595, + "memory(GiB)": 71.19, + "step": 4765, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.734021, + "epoch": 3.74, + "learning_rate": 7.392932963340151e-05, + "loss": 0.82725487, + "memory(GiB)": 71.19, + "step": 4770, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.71987934, + "epoch": 3.74, + "learning_rate": 7.38723733230283e-05, + "loss": 0.85444155, + "memory(GiB)": 71.19, + "step": 4775, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7245378, + "epoch": 3.75, + "learning_rate": 7.381537686098942e-05, + "loss": 0.85145273, + "memory(GiB)": 71.19, + "step": 4780, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.71786847, + "epoch": 3.75, + "learning_rate": 7.375834034314895e-05, + "loss": 0.88409557, + "memory(GiB)": 71.19, + "step": 4785, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.71265817, + "epoch": 3.75, + "learning_rate": 7.370126386543833e-05, + "loss": 0.89585934, + "memory(GiB)": 71.19, + "step": 4790, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7173708, + "epoch": 3.76, + "learning_rate": 7.364414752385622e-05, + "loss": 0.88221331, + "memory(GiB)": 71.19, + "step": 4795, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73161063, + "epoch": 3.76, + "learning_rate": 7.358699141446833e-05, + "loss": 0.8258852, + "memory(GiB)": 71.19, + "step": 4800, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72865162, + "epoch": 3.77, + "learning_rate": 7.35297956334072e-05, + "loss": 0.84000244, + "memory(GiB)": 71.19, + "step": 4805, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72472777, + "epoch": 3.77, + "learning_rate": 7.34725602768722e-05, + "loss": 0.84447889, + "memory(GiB)": 71.19, + "step": 4810, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7250689, + "epoch": 3.77, + "learning_rate": 7.341528544112915e-05, + "loss": 0.866607, + "memory(GiB)": 71.19, + "step": 4815, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.72776718, + "epoch": 3.78, + "learning_rate": 7.335797122251038e-05, + "loss": 0.84377356, + "memory(GiB)": 71.19, + "step": 4820, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.72724218, + "epoch": 3.78, + "learning_rate": 7.330061771741436e-05, + "loss": 0.85711832, + "memory(GiB)": 71.19, + "step": 4825, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.72393856, + "epoch": 3.79, + "learning_rate": 7.324322502230571e-05, + "loss": 0.8321475, + "memory(GiB)": 71.19, + "step": 4830, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.73015528, + "epoch": 3.79, + "learning_rate": 7.318579323371493e-05, + "loss": 0.84410172, + "memory(GiB)": 71.19, + "step": 4835, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.73202195, + "epoch": 3.79, + "learning_rate": 7.312832244823827e-05, + "loss": 0.81751471, + "memory(GiB)": 71.19, + "step": 4840, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.71658878, + "epoch": 3.8, + "learning_rate": 7.307081276253761e-05, + "loss": 0.87323961, + "memory(GiB)": 71.19, + "step": 4845, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.73308387, + "epoch": 3.8, + "learning_rate": 7.301326427334019e-05, + "loss": 0.82617254, + "memory(GiB)": 71.19, + "step": 4850, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.725809, + "epoch": 3.81, + "learning_rate": 7.295567707743856e-05, + "loss": 0.8599144, + "memory(GiB)": 71.19, + "step": 4855, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.71556239, + "epoch": 3.81, + "learning_rate": 7.289805127169038e-05, + "loss": 0.8742794, + "memory(GiB)": 71.19, + "step": 4860, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73000875, + "epoch": 3.81, + "learning_rate": 7.284038695301823e-05, + "loss": 0.83999195, + "memory(GiB)": 71.19, + "step": 4865, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72756143, + "epoch": 3.82, + "learning_rate": 7.278268421840944e-05, + "loss": 0.84345179, + "memory(GiB)": 71.19, + "step": 4870, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72185001, + "epoch": 3.82, + "learning_rate": 7.272494316491602e-05, + "loss": 0.84758148, + "memory(GiB)": 71.19, + "step": 4875, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72773247, + "epoch": 3.82, + "learning_rate": 7.266716388965437e-05, + "loss": 0.83907547, + "memory(GiB)": 71.19, + "step": 4880, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7324759, + "epoch": 3.83, + "learning_rate": 7.260934648980521e-05, + "loss": 0.81709414, + "memory(GiB)": 71.19, + "step": 4885, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73284607, + "epoch": 3.83, + "learning_rate": 7.255149106261339e-05, + "loss": 0.81797581, + "memory(GiB)": 71.19, + "step": 4890, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72334728, + "epoch": 3.84, + "learning_rate": 7.249359770538764e-05, + "loss": 0.83954468, + "memory(GiB)": 71.19, + "step": 4895, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72269335, + "epoch": 3.84, + "learning_rate": 7.24356665155006e-05, + "loss": 0.84613752, + "memory(GiB)": 71.19, + "step": 4900, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72381783, + "epoch": 3.84, + "learning_rate": 7.237769759038846e-05, + "loss": 0.83016071, + "memory(GiB)": 71.19, + "step": 4905, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72248049, + "epoch": 3.85, + "learning_rate": 7.231969102755093e-05, + "loss": 0.85668192, + "memory(GiB)": 71.19, + "step": 4910, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73048677, + "epoch": 3.85, + "learning_rate": 7.226164692455098e-05, + "loss": 0.84103241, + "memory(GiB)": 71.19, + "step": 4915, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72759271, + "epoch": 3.86, + "learning_rate": 7.220356537901474e-05, + "loss": 0.82246981, + "memory(GiB)": 71.19, + "step": 4920, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72148042, + "epoch": 3.86, + "learning_rate": 7.214544648863131e-05, + "loss": 0.85546913, + "memory(GiB)": 71.19, + "step": 4925, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72802148, + "epoch": 3.86, + "learning_rate": 7.208729035115264e-05, + "loss": 0.84229441, + "memory(GiB)": 71.19, + "step": 4930, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73225174, + "epoch": 3.87, + "learning_rate": 7.202909706439326e-05, + "loss": 0.82962236, + "memory(GiB)": 71.19, + "step": 4935, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72838197, + "epoch": 3.87, + "learning_rate": 7.197086672623023e-05, + "loss": 0.84410496, + "memory(GiB)": 71.19, + "step": 4940, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72434216, + "epoch": 3.88, + "learning_rate": 7.191259943460292e-05, + "loss": 0.85847845, + "memory(GiB)": 71.19, + "step": 4945, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.71965685, + "epoch": 3.88, + "learning_rate": 7.185429528751285e-05, + "loss": 0.85441608, + "memory(GiB)": 71.19, + "step": 4950, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.713869, + "epoch": 3.88, + "learning_rate": 7.179595438302348e-05, + "loss": 0.8926239, + "memory(GiB)": 71.19, + "step": 4955, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72364521, + "epoch": 3.89, + "learning_rate": 7.173757681926021e-05, + "loss": 0.84683857, + "memory(GiB)": 71.19, + "step": 4960, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.73327866, + "epoch": 3.89, + "learning_rate": 7.167916269440998e-05, + "loss": 0.8079627, + "memory(GiB)": 71.19, + "step": 4965, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7266232, + "epoch": 3.9, + "learning_rate": 7.162071210672128e-05, + "loss": 0.83985033, + "memory(GiB)": 71.19, + "step": 4970, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72303486, + "epoch": 3.9, + "learning_rate": 7.156222515450393e-05, + "loss": 0.85938587, + "memory(GiB)": 71.19, + "step": 4975, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72731833, + "epoch": 3.9, + "learning_rate": 7.150370193612889e-05, + "loss": 0.81108408, + "memory(GiB)": 71.19, + "step": 4980, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.72412114, + "epoch": 3.91, + "learning_rate": 7.144514255002813e-05, + "loss": 0.86580133, + "memory(GiB)": 71.19, + "step": 4985, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.73388848, + "epoch": 3.91, + "learning_rate": 7.138654709469446e-05, + "loss": 0.82623234, + "memory(GiB)": 71.19, + "step": 4990, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.73632493, + "epoch": 3.91, + "learning_rate": 7.132791566868133e-05, + "loss": 0.83154993, + "memory(GiB)": 71.19, + "step": 4995, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.71437707, + "epoch": 3.92, + "learning_rate": 7.126924837060271e-05, + "loss": 0.88094139, + "memory(GiB)": 71.19, + "step": 5000, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.71930361, + "epoch": 3.92, + "learning_rate": 7.121054529913292e-05, + "loss": 0.85584583, + "memory(GiB)": 71.19, + "step": 5005, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.73194895, + "epoch": 3.93, + "learning_rate": 7.11518065530064e-05, + "loss": 0.83459311, + "memory(GiB)": 71.19, + "step": 5010, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.73548174, + "epoch": 3.93, + "learning_rate": 7.109303223101765e-05, + "loss": 0.82189837, + "memory(GiB)": 71.19, + "step": 5015, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.72127552, + "epoch": 3.93, + "learning_rate": 7.103422243202096e-05, + "loss": 0.86382265, + "memory(GiB)": 71.19, + "step": 5020, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.73075724, + "epoch": 3.94, + "learning_rate": 7.09753772549303e-05, + "loss": 0.81052485, + "memory(GiB)": 71.19, + "step": 5025, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.72500548, + "epoch": 3.94, + "learning_rate": 7.091649679871914e-05, + "loss": 0.84569874, + "memory(GiB)": 71.19, + "step": 5030, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.72286496, + "epoch": 3.95, + "learning_rate": 7.085758116242036e-05, + "loss": 0.84847412, + "memory(GiB)": 71.19, + "step": 5035, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.72470217, + "epoch": 3.95, + "learning_rate": 7.079863044512588e-05, + "loss": 0.85448866, + "memory(GiB)": 71.19, + "step": 5040, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.71964459, + "epoch": 3.95, + "learning_rate": 7.07396447459867e-05, + "loss": 0.86957884, + "memory(GiB)": 71.19, + "step": 5045, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.72210011, + "epoch": 3.96, + "learning_rate": 7.06806241642127e-05, + "loss": 0.85360498, + "memory(GiB)": 71.19, + "step": 5050, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.71635909, + "epoch": 3.96, + "learning_rate": 7.062156879907234e-05, + "loss": 0.88525906, + "memory(GiB)": 71.19, + "step": 5055, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.71934958, + "epoch": 3.97, + "learning_rate": 7.05624787498926e-05, + "loss": 0.86156912, + "memory(GiB)": 71.19, + "step": 5060, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.7274035, + "epoch": 3.97, + "learning_rate": 7.050335411605888e-05, + "loss": 0.84397354, + "memory(GiB)": 71.19, + "step": 5065, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.72633142, + "epoch": 3.97, + "learning_rate": 7.044419499701462e-05, + "loss": 0.85779991, + "memory(GiB)": 71.19, + "step": 5070, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.7241158, + "epoch": 3.98, + "learning_rate": 7.038500149226138e-05, + "loss": 0.84771671, + "memory(GiB)": 71.19, + "step": 5075, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.71850133, + "epoch": 3.98, + "learning_rate": 7.032577370135846e-05, + "loss": 0.85709019, + "memory(GiB)": 71.19, + "step": 5080, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.71291666, + "epoch": 3.99, + "learning_rate": 7.026651172392293e-05, + "loss": 0.88760433, + "memory(GiB)": 71.19, + "step": 5085, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.72612734, + "epoch": 3.99, + "learning_rate": 7.020721565962925e-05, + "loss": 0.8476985, + "memory(GiB)": 71.19, + "step": 5090, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.72394257, + "epoch": 3.99, + "learning_rate": 7.014788560820928e-05, + "loss": 0.83070583, + "memory(GiB)": 71.19, + "step": 5095, + "train_speed(iter/s)": 0.024198 + }, + { + "acc": 0.72538056, + "epoch": 4.0, + "learning_rate": 7.0088521669452e-05, + "loss": 0.83622665, + "memory(GiB)": 71.19, + "step": 5100, + "train_speed(iter/s)": 0.024198 + }, + { + "epoch": 4.0, + "eval_acc": 0.7649636864512898, + "eval_loss": 0.6868141293525696, + "eval_runtime": 107.3501, + "eval_samples_per_second": 0.866, + "eval_steps_per_second": 0.866, + "step": 5103 + }, + { + "acc": 0.74643955, + "epoch": 4.0, + "learning_rate": 7.002912394320344e-05, + "loss": 0.78384089, + "memory(GiB)": 71.19, + "step": 5105, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.76987772, + "epoch": 4.01, + "learning_rate": 6.996969252936645e-05, + "loss": 0.70066395, + "memory(GiB)": 71.19, + "step": 5110, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.76714759, + "epoch": 4.01, + "learning_rate": 6.991022752790045e-05, + "loss": 0.69841785, + "memory(GiB)": 71.19, + "step": 5115, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.76072993, + "epoch": 4.01, + "learning_rate": 6.985072903882149e-05, + "loss": 0.7248908, + "memory(GiB)": 71.19, + "step": 5120, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.76630473, + "epoch": 4.02, + "learning_rate": 6.979119716220184e-05, + "loss": 0.71821766, + "memory(GiB)": 71.19, + "step": 5125, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.75970073, + "epoch": 4.02, + "learning_rate": 6.973163199816998e-05, + "loss": 0.71612158, + "memory(GiB)": 71.19, + "step": 5130, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.77745566, + "epoch": 4.02, + "learning_rate": 6.967203364691035e-05, + "loss": 0.65998745, + "memory(GiB)": 71.19, + "step": 5135, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.77100053, + "epoch": 4.03, + "learning_rate": 6.961240220866321e-05, + "loss": 0.68110933, + "memory(GiB)": 71.19, + "step": 5140, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.76853023, + "epoch": 4.03, + "learning_rate": 6.955273778372448e-05, + "loss": 0.69694996, + "memory(GiB)": 71.19, + "step": 5145, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.77470398, + "epoch": 4.04, + "learning_rate": 6.949304047244557e-05, + "loss": 0.67210259, + "memory(GiB)": 71.19, + "step": 5150, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.78012462, + "epoch": 4.04, + "learning_rate": 6.943331037523318e-05, + "loss": 0.68669949, + "memory(GiB)": 71.19, + "step": 5155, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.7726408, + "epoch": 4.04, + "learning_rate": 6.937354759254915e-05, + "loss": 0.6830338, + "memory(GiB)": 71.19, + "step": 5160, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.78194818, + "epoch": 4.05, + "learning_rate": 6.931375222491035e-05, + "loss": 0.66556716, + "memory(GiB)": 71.19, + "step": 5165, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.7694963, + "epoch": 4.05, + "learning_rate": 6.925392437288837e-05, + "loss": 0.70391464, + "memory(GiB)": 71.19, + "step": 5170, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.76015844, + "epoch": 4.06, + "learning_rate": 6.919406413710951e-05, + "loss": 0.70853, + "memory(GiB)": 71.19, + "step": 5175, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.78214989, + "epoch": 4.06, + "learning_rate": 6.91341716182545e-05, + "loss": 0.64797435, + "memory(GiB)": 71.19, + "step": 5180, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.76053729, + "epoch": 4.06, + "learning_rate": 6.907424691705836e-05, + "loss": 0.71844401, + "memory(GiB)": 71.19, + "step": 5185, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.76250482, + "epoch": 4.07, + "learning_rate": 6.90142901343103e-05, + "loss": 0.69147439, + "memory(GiB)": 71.19, + "step": 5190, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.76390204, + "epoch": 4.07, + "learning_rate": 6.89543013708534e-05, + "loss": 0.69992247, + "memory(GiB)": 71.19, + "step": 5195, + "train_speed(iter/s)": 0.024185 + }, + { + "acc": 0.77699156, + "epoch": 4.08, + "learning_rate": 6.889428072758458e-05, + "loss": 0.65943475, + "memory(GiB)": 71.19, + "step": 5200, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.77530951, + "epoch": 4.08, + "learning_rate": 6.883422830545437e-05, + "loss": 0.68648615, + "memory(GiB)": 71.19, + "step": 5205, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.7692956, + "epoch": 4.08, + "learning_rate": 6.87741442054668e-05, + "loss": 0.70791373, + "memory(GiB)": 71.19, + "step": 5210, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.75982862, + "epoch": 4.09, + "learning_rate": 6.871402852867906e-05, + "loss": 0.72244291, + "memory(GiB)": 71.19, + "step": 5215, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.76754866, + "epoch": 4.09, + "learning_rate": 6.865388137620156e-05, + "loss": 0.71881785, + "memory(GiB)": 71.19, + "step": 5220, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.76008248, + "epoch": 4.1, + "learning_rate": 6.859370284919762e-05, + "loss": 0.72455072, + "memory(GiB)": 71.19, + "step": 5225, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.76211185, + "epoch": 4.1, + "learning_rate": 6.853349304888331e-05, + "loss": 0.71970272, + "memory(GiB)": 71.19, + "step": 5230, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.76919765, + "epoch": 4.1, + "learning_rate": 6.847325207652733e-05, + "loss": 0.69645362, + "memory(GiB)": 71.19, + "step": 5235, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.78420725, + "epoch": 4.11, + "learning_rate": 6.841298003345075e-05, + "loss": 0.66271205, + "memory(GiB)": 71.19, + "step": 5240, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.77155137, + "epoch": 4.11, + "learning_rate": 6.835267702102697e-05, + "loss": 0.69231195, + "memory(GiB)": 71.19, + "step": 5245, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.76921053, + "epoch": 4.11, + "learning_rate": 6.829234314068143e-05, + "loss": 0.69602065, + "memory(GiB)": 71.19, + "step": 5250, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.7555491, + "epoch": 4.12, + "learning_rate": 6.823197849389152e-05, + "loss": 0.74989166, + "memory(GiB)": 71.19, + "step": 5255, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.77402582, + "epoch": 4.12, + "learning_rate": 6.817158318218638e-05, + "loss": 0.68813601, + "memory(GiB)": 71.19, + "step": 5260, + "train_speed(iter/s)": 0.024186 + }, + { + "acc": 0.77681746, + "epoch": 4.13, + "learning_rate": 6.811115730714665e-05, + "loss": 0.66130342, + "memory(GiB)": 71.19, + "step": 5265, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76329417, + "epoch": 4.13, + "learning_rate": 6.80507009704045e-05, + "loss": 0.72811098, + "memory(GiB)": 71.19, + "step": 5270, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76749864, + "epoch": 4.13, + "learning_rate": 6.799021427364324e-05, + "loss": 0.69163623, + "memory(GiB)": 71.19, + "step": 5275, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77502966, + "epoch": 4.14, + "learning_rate": 6.792969731859727e-05, + "loss": 0.68422966, + "memory(GiB)": 71.19, + "step": 5280, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76088901, + "epoch": 4.14, + "learning_rate": 6.786915020705189e-05, + "loss": 0.72609415, + "memory(GiB)": 71.19, + "step": 5285, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77280045, + "epoch": 4.15, + "learning_rate": 6.780857304084309e-05, + "loss": 0.67699676, + "memory(GiB)": 71.19, + "step": 5290, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76591234, + "epoch": 4.15, + "learning_rate": 6.774796592185746e-05, + "loss": 0.71467628, + "memory(GiB)": 71.19, + "step": 5295, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76299758, + "epoch": 4.15, + "learning_rate": 6.768732895203196e-05, + "loss": 0.71544986, + "memory(GiB)": 71.19, + "step": 5300, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77150006, + "epoch": 4.16, + "learning_rate": 6.762666223335372e-05, + "loss": 0.69964447, + "memory(GiB)": 71.19, + "step": 5305, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.75832853, + "epoch": 4.16, + "learning_rate": 6.756596586785992e-05, + "loss": 0.74018202, + "memory(GiB)": 71.19, + "step": 5310, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77024693, + "epoch": 4.17, + "learning_rate": 6.750523995763762e-05, + "loss": 0.70159526, + "memory(GiB)": 71.19, + "step": 5315, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77265477, + "epoch": 4.17, + "learning_rate": 6.744448460482357e-05, + "loss": 0.65708079, + "memory(GiB)": 71.19, + "step": 5320, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.7672987, + "epoch": 4.17, + "learning_rate": 6.738369991160402e-05, + "loss": 0.70357451, + "memory(GiB)": 71.19, + "step": 5325, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.78470359, + "epoch": 4.18, + "learning_rate": 6.732288598021458e-05, + "loss": 0.66129317, + "memory(GiB)": 71.19, + "step": 5330, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.75318675, + "epoch": 4.18, + "learning_rate": 6.726204291294004e-05, + "loss": 0.7573597, + "memory(GiB)": 71.19, + "step": 5335, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76494775, + "epoch": 4.19, + "learning_rate": 6.720117081211419e-05, + "loss": 0.72770386, + "memory(GiB)": 71.19, + "step": 5340, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77200723, + "epoch": 4.19, + "learning_rate": 6.714026978011967e-05, + "loss": 0.68737392, + "memory(GiB)": 71.19, + "step": 5345, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.75903749, + "epoch": 4.19, + "learning_rate": 6.707933991938776e-05, + "loss": 0.70887847, + "memory(GiB)": 71.19, + "step": 5350, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76615915, + "epoch": 4.2, + "learning_rate": 6.701838133239822e-05, + "loss": 0.70868678, + "memory(GiB)": 71.19, + "step": 5355, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76639304, + "epoch": 4.2, + "learning_rate": 6.695739412167916e-05, + "loss": 0.71408496, + "memory(GiB)": 71.19, + "step": 5360, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.7674984, + "epoch": 4.2, + "learning_rate": 6.689637838980678e-05, + "loss": 0.69706955, + "memory(GiB)": 71.19, + "step": 5365, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77202821, + "epoch": 4.21, + "learning_rate": 6.683533423940531e-05, + "loss": 0.69185266, + "memory(GiB)": 71.19, + "step": 5370, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77602949, + "epoch": 4.21, + "learning_rate": 6.677426177314675e-05, + "loss": 0.66611705, + "memory(GiB)": 71.19, + "step": 5375, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76406932, + "epoch": 4.22, + "learning_rate": 6.67131610937507e-05, + "loss": 0.70172687, + "memory(GiB)": 71.19, + "step": 5380, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76476321, + "epoch": 4.22, + "learning_rate": 6.665203230398425e-05, + "loss": 0.6902421, + "memory(GiB)": 71.19, + "step": 5385, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.75160317, + "epoch": 4.22, + "learning_rate": 6.659087550666177e-05, + "loss": 0.74526944, + "memory(GiB)": 71.19, + "step": 5390, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76145148, + "epoch": 4.23, + "learning_rate": 6.652969080464472e-05, + "loss": 0.73384714, + "memory(GiB)": 71.19, + "step": 5395, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76717629, + "epoch": 4.23, + "learning_rate": 6.646847830084148e-05, + "loss": 0.69845848, + "memory(GiB)": 71.19, + "step": 5400, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.78010674, + "epoch": 4.24, + "learning_rate": 6.640723809820724e-05, + "loss": 0.67031679, + "memory(GiB)": 71.19, + "step": 5405, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76649265, + "epoch": 4.24, + "learning_rate": 6.634597029974373e-05, + "loss": 0.72474589, + "memory(GiB)": 71.19, + "step": 5410, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77016811, + "epoch": 4.24, + "learning_rate": 6.628467500849909e-05, + "loss": 0.69438682, + "memory(GiB)": 71.19, + "step": 5415, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76155553, + "epoch": 4.25, + "learning_rate": 6.622335232756773e-05, + "loss": 0.70451875, + "memory(GiB)": 71.19, + "step": 5420, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.75910053, + "epoch": 4.25, + "learning_rate": 6.616200236009016e-05, + "loss": 0.73806357, + "memory(GiB)": 71.19, + "step": 5425, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76222086, + "epoch": 4.26, + "learning_rate": 6.61006252092527e-05, + "loss": 0.7077971, + "memory(GiB)": 71.19, + "step": 5430, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76169763, + "epoch": 4.26, + "learning_rate": 6.603922097828745e-05, + "loss": 0.71544447, + "memory(GiB)": 71.19, + "step": 5435, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77774611, + "epoch": 4.26, + "learning_rate": 6.597778977047205e-05, + "loss": 0.68002701, + "memory(GiB)": 71.19, + "step": 5440, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76104169, + "epoch": 4.27, + "learning_rate": 6.591633168912947e-05, + "loss": 0.72893152, + "memory(GiB)": 71.19, + "step": 5445, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.77122054, + "epoch": 4.27, + "learning_rate": 6.585484683762794e-05, + "loss": 0.69275894, + "memory(GiB)": 71.19, + "step": 5450, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.77282248, + "epoch": 4.28, + "learning_rate": 6.57933353193807e-05, + "loss": 0.69376159, + "memory(GiB)": 71.19, + "step": 5455, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.7599041, + "epoch": 4.28, + "learning_rate": 6.57317972378458e-05, + "loss": 0.7232058, + "memory(GiB)": 71.19, + "step": 5460, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.77883954, + "epoch": 4.28, + "learning_rate": 6.567023269652602e-05, + "loss": 0.66660323, + "memory(GiB)": 71.19, + "step": 5465, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76177578, + "epoch": 4.29, + "learning_rate": 6.56086417989686e-05, + "loss": 0.71664767, + "memory(GiB)": 71.19, + "step": 5470, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76507282, + "epoch": 4.29, + "learning_rate": 6.554702464876514e-05, + "loss": 0.71534958, + "memory(GiB)": 71.19, + "step": 5475, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.75966554, + "epoch": 4.3, + "learning_rate": 6.54853813495514e-05, + "loss": 0.71393213, + "memory(GiB)": 71.19, + "step": 5480, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76525397, + "epoch": 4.3, + "learning_rate": 6.54237120050071e-05, + "loss": 0.70895991, + "memory(GiB)": 71.19, + "step": 5485, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.77907438, + "epoch": 4.3, + "learning_rate": 6.536201671885575e-05, + "loss": 0.69699798, + "memory(GiB)": 71.19, + "step": 5490, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.77636795, + "epoch": 4.31, + "learning_rate": 6.530029559486455e-05, + "loss": 0.67650938, + "memory(GiB)": 71.19, + "step": 5495, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.75787897, + "epoch": 4.31, + "learning_rate": 6.523854873684409e-05, + "loss": 0.73933854, + "memory(GiB)": 71.19, + "step": 5500, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.75708508, + "epoch": 4.31, + "learning_rate": 6.517677624864831e-05, + "loss": 0.72797713, + "memory(GiB)": 71.19, + "step": 5505, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76517878, + "epoch": 4.32, + "learning_rate": 6.511497823417418e-05, + "loss": 0.71284537, + "memory(GiB)": 71.19, + "step": 5510, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77976227, + "epoch": 4.32, + "learning_rate": 6.50531547973617e-05, + "loss": 0.66220131, + "memory(GiB)": 71.19, + "step": 5515, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.7598443, + "epoch": 4.33, + "learning_rate": 6.499130604219354e-05, + "loss": 0.71064129, + "memory(GiB)": 71.19, + "step": 5520, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76225853, + "epoch": 4.33, + "learning_rate": 6.492943207269498e-05, + "loss": 0.74577045, + "memory(GiB)": 71.19, + "step": 5525, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.78424692, + "epoch": 4.33, + "learning_rate": 6.486753299293375e-05, + "loss": 0.66039305, + "memory(GiB)": 71.19, + "step": 5530, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76662159, + "epoch": 4.34, + "learning_rate": 6.480560890701976e-05, + "loss": 0.71364965, + "memory(GiB)": 71.19, + "step": 5535, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76090045, + "epoch": 4.34, + "learning_rate": 6.474365991910501e-05, + "loss": 0.70872817, + "memory(GiB)": 71.19, + "step": 5540, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.75455337, + "epoch": 4.35, + "learning_rate": 6.468168613338339e-05, + "loss": 0.74482007, + "memory(GiB)": 71.19, + "step": 5545, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.76986208, + "epoch": 4.35, + "learning_rate": 6.461968765409041e-05, + "loss": 0.69489803, + "memory(GiB)": 71.19, + "step": 5550, + "train_speed(iter/s)": 0.024187 + }, + { + "acc": 0.77070737, + "epoch": 4.35, + "learning_rate": 6.455766458550329e-05, + "loss": 0.71167393, + "memory(GiB)": 71.19, + "step": 5555, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76613202, + "epoch": 4.36, + "learning_rate": 6.449561703194042e-05, + "loss": 0.70667567, + "memory(GiB)": 71.19, + "step": 5560, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76189098, + "epoch": 4.36, + "learning_rate": 6.44335450977615e-05, + "loss": 0.71174026, + "memory(GiB)": 71.19, + "step": 5565, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.7586081, + "epoch": 4.37, + "learning_rate": 6.437144888736715e-05, + "loss": 0.7246048, + "memory(GiB)": 71.19, + "step": 5570, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.7720696, + "epoch": 4.37, + "learning_rate": 6.43093285051989e-05, + "loss": 0.69544797, + "memory(GiB)": 71.19, + "step": 5575, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.77282968, + "epoch": 4.37, + "learning_rate": 6.424718405573888e-05, + "loss": 0.6709199, + "memory(GiB)": 71.19, + "step": 5580, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76675258, + "epoch": 4.38, + "learning_rate": 6.418501564350972e-05, + "loss": 0.70900588, + "memory(GiB)": 71.19, + "step": 5585, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.77312188, + "epoch": 4.38, + "learning_rate": 6.412282337307436e-05, + "loss": 0.66980944, + "memory(GiB)": 71.19, + "step": 5590, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76313424, + "epoch": 4.39, + "learning_rate": 6.406060734903582e-05, + "loss": 0.71873169, + "memory(GiB)": 71.19, + "step": 5595, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76170006, + "epoch": 4.39, + "learning_rate": 6.399836767603715e-05, + "loss": 0.73406935, + "memory(GiB)": 71.19, + "step": 5600, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.77519426, + "epoch": 4.39, + "learning_rate": 6.393610445876113e-05, + "loss": 0.70191274, + "memory(GiB)": 71.19, + "step": 5605, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76001482, + "epoch": 4.4, + "learning_rate": 6.387381780193014e-05, + "loss": 0.70856462, + "memory(GiB)": 71.19, + "step": 5610, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76338596, + "epoch": 4.4, + "learning_rate": 6.381150781030597e-05, + "loss": 0.70342155, + "memory(GiB)": 71.19, + "step": 5615, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.77544284, + "epoch": 4.4, + "learning_rate": 6.37491745886897e-05, + "loss": 0.68927674, + "memory(GiB)": 71.19, + "step": 5620, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.7679841, + "epoch": 4.41, + "learning_rate": 6.368681824192147e-05, + "loss": 0.69150701, + "memory(GiB)": 71.19, + "step": 5625, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.7673996, + "epoch": 4.41, + "learning_rate": 6.362443887488026e-05, + "loss": 0.7154283, + "memory(GiB)": 71.19, + "step": 5630, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76109505, + "epoch": 4.42, + "learning_rate": 6.356203659248386e-05, + "loss": 0.72742548, + "memory(GiB)": 71.19, + "step": 5635, + "train_speed(iter/s)": 0.024188 + }, + { + "acc": 0.76917353, + "epoch": 4.42, + "learning_rate": 6.349961149968849e-05, + "loss": 0.71016984, + "memory(GiB)": 71.19, + "step": 5640, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.77111497, + "epoch": 4.42, + "learning_rate": 6.343716370148887e-05, + "loss": 0.70852461, + "memory(GiB)": 71.19, + "step": 5645, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.76087008, + "epoch": 4.43, + "learning_rate": 6.337469330291778e-05, + "loss": 0.71220889, + "memory(GiB)": 71.19, + "step": 5650, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.7703217, + "epoch": 4.43, + "learning_rate": 6.331220040904612e-05, + "loss": 0.69431052, + "memory(GiB)": 71.19, + "step": 5655, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.76420479, + "epoch": 4.44, + "learning_rate": 6.324968512498255e-05, + "loss": 0.71943445, + "memory(GiB)": 71.19, + "step": 5660, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.76351399, + "epoch": 4.44, + "learning_rate": 6.318714755587341e-05, + "loss": 0.72373419, + "memory(GiB)": 71.19, + "step": 5665, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.76989708, + "epoch": 4.44, + "learning_rate": 6.312458780690253e-05, + "loss": 0.70568027, + "memory(GiB)": 71.19, + "step": 5670, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.7618772, + "epoch": 4.45, + "learning_rate": 6.306200598329105e-05, + "loss": 0.72609386, + "memory(GiB)": 71.19, + "step": 5675, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.76857285, + "epoch": 4.45, + "learning_rate": 6.299940219029722e-05, + "loss": 0.7043541, + "memory(GiB)": 71.19, + "step": 5680, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.7670013, + "epoch": 4.46, + "learning_rate": 6.293677653321624e-05, + "loss": 0.69272251, + "memory(GiB)": 71.19, + "step": 5685, + "train_speed(iter/s)": 0.024189 + }, + { + "acc": 0.76531138, + "epoch": 4.46, + "learning_rate": 6.287412911738013e-05, + "loss": 0.72677112, + "memory(GiB)": 71.19, + "step": 5690, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.76662335, + "epoch": 4.46, + "learning_rate": 6.281146004815743e-05, + "loss": 0.71060648, + "memory(GiB)": 71.19, + "step": 5695, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.76985221, + "epoch": 4.47, + "learning_rate": 6.274876943095316e-05, + "loss": 0.69214835, + "memory(GiB)": 71.19, + "step": 5700, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.75513039, + "epoch": 4.47, + "learning_rate": 6.268605737120856e-05, + "loss": 0.73119974, + "memory(GiB)": 71.19, + "step": 5705, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.7597374, + "epoch": 4.48, + "learning_rate": 6.262332397440094e-05, + "loss": 0.71646504, + "memory(GiB)": 71.19, + "step": 5710, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.76583333, + "epoch": 4.48, + "learning_rate": 6.256056934604348e-05, + "loss": 0.71394172, + "memory(GiB)": 71.19, + "step": 5715, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.77272115, + "epoch": 4.48, + "learning_rate": 6.24977935916851e-05, + "loss": 0.69063115, + "memory(GiB)": 71.19, + "step": 5720, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.76995649, + "epoch": 4.49, + "learning_rate": 6.243499681691024e-05, + "loss": 0.695227, + "memory(GiB)": 71.19, + "step": 5725, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.75672503, + "epoch": 4.49, + "learning_rate": 6.237217912733869e-05, + "loss": 0.73567729, + "memory(GiB)": 71.19, + "step": 5730, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.76756783, + "epoch": 4.49, + "learning_rate": 6.23093406286254e-05, + "loss": 0.69406104, + "memory(GiB)": 71.19, + "step": 5735, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.78130131, + "epoch": 4.5, + "learning_rate": 6.224648142646037e-05, + "loss": 0.68319197, + "memory(GiB)": 71.19, + "step": 5740, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.75013237, + "epoch": 4.5, + "learning_rate": 6.218360162656836e-05, + "loss": 0.73892965, + "memory(GiB)": 71.19, + "step": 5745, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76402731, + "epoch": 4.51, + "learning_rate": 6.212070133470884e-05, + "loss": 0.71493678, + "memory(GiB)": 71.19, + "step": 5750, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76769323, + "epoch": 4.51, + "learning_rate": 6.205778065667566e-05, + "loss": 0.71724548, + "memory(GiB)": 71.19, + "step": 5755, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.75835562, + "epoch": 4.51, + "learning_rate": 6.199483969829705e-05, + "loss": 0.7360487, + "memory(GiB)": 71.19, + "step": 5760, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76376677, + "epoch": 4.52, + "learning_rate": 6.19318785654353e-05, + "loss": 0.71627126, + "memory(GiB)": 71.19, + "step": 5765, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76958847, + "epoch": 4.52, + "learning_rate": 6.186889736398664e-05, + "loss": 0.71689034, + "memory(GiB)": 71.19, + "step": 5770, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.75499663, + "epoch": 4.53, + "learning_rate": 6.180589619988103e-05, + "loss": 0.74792767, + "memory(GiB)": 71.19, + "step": 5775, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7811924, + "epoch": 4.53, + "learning_rate": 6.174287517908207e-05, + "loss": 0.6795465, + "memory(GiB)": 71.19, + "step": 5780, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76298246, + "epoch": 4.53, + "learning_rate": 6.167983440758672e-05, + "loss": 0.71815233, + "memory(GiB)": 71.19, + "step": 5785, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7640779, + "epoch": 4.54, + "learning_rate": 6.16167739914251e-05, + "loss": 0.71062479, + "memory(GiB)": 71.19, + "step": 5790, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76794705, + "epoch": 4.54, + "learning_rate": 6.15536940366605e-05, + "loss": 0.70243373, + "memory(GiB)": 71.19, + "step": 5795, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76325617, + "epoch": 4.55, + "learning_rate": 6.149059464938893e-05, + "loss": 0.70105352, + "memory(GiB)": 71.19, + "step": 5800, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.77031732, + "epoch": 4.55, + "learning_rate": 6.142747593573922e-05, + "loss": 0.70099783, + "memory(GiB)": 71.19, + "step": 5805, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76253133, + "epoch": 4.55, + "learning_rate": 6.136433800187262e-05, + "loss": 0.72461586, + "memory(GiB)": 71.19, + "step": 5810, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.77606282, + "epoch": 4.56, + "learning_rate": 6.130118095398269e-05, + "loss": 0.67780962, + "memory(GiB)": 71.19, + "step": 5815, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7617002, + "epoch": 4.56, + "learning_rate": 6.123800489829523e-05, + "loss": 0.70110459, + "memory(GiB)": 71.19, + "step": 5820, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76943331, + "epoch": 4.57, + "learning_rate": 6.117480994106793e-05, + "loss": 0.71336999, + "memory(GiB)": 71.19, + "step": 5825, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.75526834, + "epoch": 4.57, + "learning_rate": 6.111159618859031e-05, + "loss": 0.74314675, + "memory(GiB)": 71.19, + "step": 5830, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76729212, + "epoch": 4.57, + "learning_rate": 6.104836374718347e-05, + "loss": 0.6993835, + "memory(GiB)": 71.19, + "step": 5835, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.77066221, + "epoch": 4.58, + "learning_rate": 6.0985112723199976e-05, + "loss": 0.68562093, + "memory(GiB)": 71.19, + "step": 5840, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76890574, + "epoch": 4.58, + "learning_rate": 6.0921843223023634e-05, + "loss": 0.70060396, + "memory(GiB)": 71.19, + "step": 5845, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76268287, + "epoch": 4.59, + "learning_rate": 6.085855535306931e-05, + "loss": 0.71980524, + "memory(GiB)": 71.19, + "step": 5850, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.7607635, + "epoch": 4.59, + "learning_rate": 6.0795249219782814e-05, + "loss": 0.7061645, + "memory(GiB)": 71.19, + "step": 5855, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76794925, + "epoch": 4.59, + "learning_rate": 6.0731924929640614e-05, + "loss": 0.70129228, + "memory(GiB)": 71.19, + "step": 5860, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76963058, + "epoch": 4.6, + "learning_rate": 6.066858258914978e-05, + "loss": 0.71304092, + "memory(GiB)": 71.19, + "step": 5865, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76559944, + "epoch": 4.6, + "learning_rate": 6.060522230484769e-05, + "loss": 0.70504079, + "memory(GiB)": 71.19, + "step": 5870, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.77082806, + "epoch": 4.6, + "learning_rate": 6.054184418330191e-05, + "loss": 0.6952466, + "memory(GiB)": 71.19, + "step": 5875, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76205645, + "epoch": 4.61, + "learning_rate": 6.0478448331110015e-05, + "loss": 0.71116371, + "memory(GiB)": 71.19, + "step": 5880, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.77297888, + "epoch": 4.61, + "learning_rate": 6.041503485489942e-05, + "loss": 0.68239374, + "memory(GiB)": 71.19, + "step": 5885, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76356063, + "epoch": 4.62, + "learning_rate": 6.035160386132718e-05, + "loss": 0.70998626, + "memory(GiB)": 71.19, + "step": 5890, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76024551, + "epoch": 4.62, + "learning_rate": 6.0288155457079754e-05, + "loss": 0.72426705, + "memory(GiB)": 71.19, + "step": 5895, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76642303, + "epoch": 4.62, + "learning_rate": 6.022468974887295e-05, + "loss": 0.72213707, + "memory(GiB)": 71.19, + "step": 5900, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.77551522, + "epoch": 4.63, + "learning_rate": 6.016120684345167e-05, + "loss": 0.68373032, + "memory(GiB)": 71.19, + "step": 5905, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76769862, + "epoch": 4.63, + "learning_rate": 6.009770684758973e-05, + "loss": 0.71322379, + "memory(GiB)": 71.19, + "step": 5910, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76745868, + "epoch": 4.64, + "learning_rate": 6.0034189868089677e-05, + "loss": 0.71255426, + "memory(GiB)": 71.19, + "step": 5915, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76841478, + "epoch": 4.64, + "learning_rate": 5.9970656011782646e-05, + "loss": 0.70099401, + "memory(GiB)": 71.19, + "step": 5920, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76973267, + "epoch": 4.64, + "learning_rate": 5.990710538552813e-05, + "loss": 0.6992341, + "memory(GiB)": 71.19, + "step": 5925, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.74826236, + "epoch": 4.65, + "learning_rate": 5.984353809621388e-05, + "loss": 0.74642177, + "memory(GiB)": 71.19, + "step": 5930, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.7560431, + "epoch": 4.65, + "learning_rate": 5.977995425075562e-05, + "loss": 0.72930694, + "memory(GiB)": 71.19, + "step": 5935, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.76061263, + "epoch": 4.66, + "learning_rate": 5.971635395609694e-05, + "loss": 0.73713555, + "memory(GiB)": 71.19, + "step": 5940, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.75073433, + "epoch": 4.66, + "learning_rate": 5.965273731920908e-05, + "loss": 0.74310646, + "memory(GiB)": 71.19, + "step": 5945, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.76606636, + "epoch": 4.66, + "learning_rate": 5.958910444709083e-05, + "loss": 0.7026166, + "memory(GiB)": 71.19, + "step": 5950, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.74374471, + "epoch": 4.67, + "learning_rate": 5.9525455446768194e-05, + "loss": 0.77414222, + "memory(GiB)": 71.19, + "step": 5955, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.7663578, + "epoch": 4.67, + "learning_rate": 5.9461790425294375e-05, + "loss": 0.71838512, + "memory(GiB)": 71.19, + "step": 5960, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.76450734, + "epoch": 4.68, + "learning_rate": 5.939810948974948e-05, + "loss": 0.72742691, + "memory(GiB)": 71.19, + "step": 5965, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.7788063, + "epoch": 4.68, + "learning_rate": 5.933441274724041e-05, + "loss": 0.66779647, + "memory(GiB)": 71.19, + "step": 5970, + "train_speed(iter/s)": 0.02419 + }, + { + "acc": 0.77166514, + "epoch": 4.68, + "learning_rate": 5.927070030490062e-05, + "loss": 0.68094988, + "memory(GiB)": 71.19, + "step": 5975, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76896586, + "epoch": 4.69, + "learning_rate": 5.9206972269890014e-05, + "loss": 0.68136163, + "memory(GiB)": 71.19, + "step": 5980, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76952062, + "epoch": 4.69, + "learning_rate": 5.914322874939466e-05, + "loss": 0.7040164, + "memory(GiB)": 71.19, + "step": 5985, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.75679116, + "epoch": 4.69, + "learning_rate": 5.907946985062678e-05, + "loss": 0.73868618, + "memory(GiB)": 71.19, + "step": 5990, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76209521, + "epoch": 4.7, + "learning_rate": 5.9015695680824325e-05, + "loss": 0.72485847, + "memory(GiB)": 71.19, + "step": 5995, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76353059, + "epoch": 4.7, + "learning_rate": 5.8951906347251027e-05, + "loss": 0.71688752, + "memory(GiB)": 71.19, + "step": 6000, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.75670443, + "epoch": 4.71, + "learning_rate": 5.888810195719609e-05, + "loss": 0.73053293, + "memory(GiB)": 71.19, + "step": 6005, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.75863943, + "epoch": 4.71, + "learning_rate": 5.8824282617974045e-05, + "loss": 0.70380878, + "memory(GiB)": 71.19, + "step": 6010, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76119242, + "epoch": 4.71, + "learning_rate": 5.876044843692456e-05, + "loss": 0.7133338, + "memory(GiB)": 71.19, + "step": 6015, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76129284, + "epoch": 4.72, + "learning_rate": 5.869659952141228e-05, + "loss": 0.71880956, + "memory(GiB)": 71.19, + "step": 6020, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76461582, + "epoch": 4.72, + "learning_rate": 5.8632735978826626e-05, + "loss": 0.71644011, + "memory(GiB)": 71.19, + "step": 6025, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76767044, + "epoch": 4.73, + "learning_rate": 5.856885791658158e-05, + "loss": 0.69806213, + "memory(GiB)": 71.19, + "step": 6030, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.75022593, + "epoch": 4.73, + "learning_rate": 5.8504965442115644e-05, + "loss": 0.75385232, + "memory(GiB)": 71.19, + "step": 6035, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.75788932, + "epoch": 4.73, + "learning_rate": 5.844105866289147e-05, + "loss": 0.73194909, + "memory(GiB)": 71.19, + "step": 6040, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.75529494, + "epoch": 4.74, + "learning_rate": 5.83771376863958e-05, + "loss": 0.72789454, + "memory(GiB)": 71.19, + "step": 6045, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.77266655, + "epoch": 4.74, + "learning_rate": 5.831320262013926e-05, + "loss": 0.6826066, + "memory(GiB)": 71.19, + "step": 6050, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.75812349, + "epoch": 4.75, + "learning_rate": 5.824925357165617e-05, + "loss": 0.73104687, + "memory(GiB)": 71.19, + "step": 6055, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.76626301, + "epoch": 4.75, + "learning_rate": 5.818529064850436e-05, + "loss": 0.7082016, + "memory(GiB)": 71.19, + "step": 6060, + "train_speed(iter/s)": 0.024191 + }, + { + "acc": 0.7526711, + "epoch": 4.75, + "learning_rate": 5.8121313958265e-05, + "loss": 0.74757395, + "memory(GiB)": 71.19, + "step": 6065, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76689634, + "epoch": 4.76, + "learning_rate": 5.8057323608542425e-05, + "loss": 0.70414853, + "memory(GiB)": 71.19, + "step": 6070, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.75855989, + "epoch": 4.76, + "learning_rate": 5.799331970696393e-05, + "loss": 0.73491292, + "memory(GiB)": 71.19, + "step": 6075, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.74774814, + "epoch": 4.77, + "learning_rate": 5.792930236117964e-05, + "loss": 0.76034083, + "memory(GiB)": 71.19, + "step": 6080, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.74793382, + "epoch": 4.77, + "learning_rate": 5.786527167886221e-05, + "loss": 0.76183257, + "memory(GiB)": 71.19, + "step": 6085, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76219697, + "epoch": 4.77, + "learning_rate": 5.78012277677068e-05, + "loss": 0.73324904, + "memory(GiB)": 71.19, + "step": 6090, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.76384172, + "epoch": 4.78, + "learning_rate": 5.7737170735430825e-05, + "loss": 0.71691432, + "memory(GiB)": 71.19, + "step": 6095, + "train_speed(iter/s)": 0.024192 + }, + { + "acc": 0.74610209, + "epoch": 4.78, + "learning_rate": 5.7673100689773707e-05, + "loss": 0.75250793, + "memory(GiB)": 71.19, + "step": 6100, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.75925684, + "epoch": 4.78, + "learning_rate": 5.760901773849682e-05, + "loss": 0.71436172, + "memory(GiB)": 71.19, + "step": 6105, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.76955056, + "epoch": 4.79, + "learning_rate": 5.754492198938318e-05, + "loss": 0.69148941, + "memory(GiB)": 71.19, + "step": 6110, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.75973935, + "epoch": 4.79, + "learning_rate": 5.748081355023739e-05, + "loss": 0.7391295, + "memory(GiB)": 71.19, + "step": 6115, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.75714288, + "epoch": 4.8, + "learning_rate": 5.741669252888535e-05, + "loss": 0.71268778, + "memory(GiB)": 71.19, + "step": 6120, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.77134218, + "epoch": 4.8, + "learning_rate": 5.735255903317417e-05, + "loss": 0.69459882, + "memory(GiB)": 71.19, + "step": 6125, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.76627364, + "epoch": 4.8, + "learning_rate": 5.72884131709719e-05, + "loss": 0.69015708, + "memory(GiB)": 71.19, + "step": 6130, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.75454583, + "epoch": 4.81, + "learning_rate": 5.7224255050167394e-05, + "loss": 0.75072398, + "memory(GiB)": 71.19, + "step": 6135, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.77098956, + "epoch": 4.81, + "learning_rate": 5.7160084778670156e-05, + "loss": 0.69697561, + "memory(GiB)": 71.19, + "step": 6140, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.76857743, + "epoch": 4.82, + "learning_rate": 5.709590246441007e-05, + "loss": 0.71755748, + "memory(GiB)": 71.19, + "step": 6145, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.75801721, + "epoch": 4.82, + "learning_rate": 5.703170821533733e-05, + "loss": 0.71778822, + "memory(GiB)": 71.19, + "step": 6150, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.76996355, + "epoch": 4.82, + "learning_rate": 5.696750213942218e-05, + "loss": 0.67777882, + "memory(GiB)": 71.19, + "step": 6155, + "train_speed(iter/s)": 0.024193 + }, + { + "acc": 0.7731328, + "epoch": 4.83, + "learning_rate": 5.690328434465475e-05, + "loss": 0.70121741, + "memory(GiB)": 71.19, + "step": 6160, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.75167289, + "epoch": 4.83, + "learning_rate": 5.68390549390449e-05, + "loss": 0.73404212, + "memory(GiB)": 71.19, + "step": 6165, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76081247, + "epoch": 4.84, + "learning_rate": 5.677481403062199e-05, + "loss": 0.72303152, + "memory(GiB)": 71.19, + "step": 6170, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.77228723, + "epoch": 4.84, + "learning_rate": 5.671056172743479e-05, + "loss": 0.68236284, + "memory(GiB)": 71.19, + "step": 6175, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76532092, + "epoch": 4.84, + "learning_rate": 5.6646298137551125e-05, + "loss": 0.70234327, + "memory(GiB)": 71.19, + "step": 6180, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76404386, + "epoch": 4.85, + "learning_rate": 5.658202336905791e-05, + "loss": 0.71822534, + "memory(GiB)": 71.19, + "step": 6185, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.75237103, + "epoch": 4.85, + "learning_rate": 5.651773753006081e-05, + "loss": 0.74845624, + "memory(GiB)": 71.19, + "step": 6190, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7637733, + "epoch": 4.86, + "learning_rate": 5.64534407286841e-05, + "loss": 0.71927757, + "memory(GiB)": 71.19, + "step": 6195, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.7700985, + "epoch": 4.86, + "learning_rate": 5.638913307307055e-05, + "loss": 0.71858625, + "memory(GiB)": 71.19, + "step": 6200, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76865525, + "epoch": 4.86, + "learning_rate": 5.632481467138111e-05, + "loss": 0.70387897, + "memory(GiB)": 71.19, + "step": 6205, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76575408, + "epoch": 4.87, + "learning_rate": 5.626048563179487e-05, + "loss": 0.6937983, + "memory(GiB)": 71.19, + "step": 6210, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76595235, + "epoch": 4.87, + "learning_rate": 5.619614606250877e-05, + "loss": 0.70782719, + "memory(GiB)": 71.19, + "step": 6215, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.77544599, + "epoch": 4.88, + "learning_rate": 5.613179607173748e-05, + "loss": 0.70953703, + "memory(GiB)": 71.19, + "step": 6220, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76125436, + "epoch": 4.88, + "learning_rate": 5.6067435767713184e-05, + "loss": 0.71401358, + "memory(GiB)": 71.19, + "step": 6225, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.77986512, + "epoch": 4.88, + "learning_rate": 5.600306525868544e-05, + "loss": 0.66382799, + "memory(GiB)": 71.19, + "step": 6230, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.77460809, + "epoch": 4.89, + "learning_rate": 5.593868465292094e-05, + "loss": 0.70628066, + "memory(GiB)": 71.19, + "step": 6235, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.77852488, + "epoch": 4.89, + "learning_rate": 5.5874294058703346e-05, + "loss": 0.68490057, + "memory(GiB)": 71.19, + "step": 6240, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76132755, + "epoch": 4.89, + "learning_rate": 5.58098935843332e-05, + "loss": 0.71025805, + "memory(GiB)": 71.19, + "step": 6245, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76081877, + "epoch": 4.9, + "learning_rate": 5.574548333812753e-05, + "loss": 0.70690475, + "memory(GiB)": 71.19, + "step": 6250, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.76902518, + "epoch": 4.9, + "learning_rate": 5.5681063428419944e-05, + "loss": 0.69533243, + "memory(GiB)": 71.19, + "step": 6255, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.77235093, + "epoch": 4.91, + "learning_rate": 5.5616633963560194e-05, + "loss": 0.69402099, + "memory(GiB)": 71.19, + "step": 6260, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.75550814, + "epoch": 4.91, + "learning_rate": 5.555219505191417e-05, + "loss": 0.7272469, + "memory(GiB)": 71.19, + "step": 6265, + "train_speed(iter/s)": 0.024194 + }, + { + "acc": 0.75596237, + "epoch": 4.91, + "learning_rate": 5.548774680186358e-05, + "loss": 0.74081788, + "memory(GiB)": 71.19, + "step": 6270, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.7628664, + "epoch": 4.92, + "learning_rate": 5.5423289321805914e-05, + "loss": 0.71786399, + "memory(GiB)": 71.19, + "step": 6275, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.76951499, + "epoch": 4.92, + "learning_rate": 5.535882272015417e-05, + "loss": 0.70130796, + "memory(GiB)": 71.19, + "step": 6280, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.75305843, + "epoch": 4.93, + "learning_rate": 5.529434710533664e-05, + "loss": 0.74882407, + "memory(GiB)": 71.19, + "step": 6285, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.76734796, + "epoch": 4.93, + "learning_rate": 5.522986258579681e-05, + "loss": 0.71578069, + "memory(GiB)": 71.19, + "step": 6290, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.76613975, + "epoch": 4.93, + "learning_rate": 5.516536926999316e-05, + "loss": 0.70828838, + "memory(GiB)": 71.19, + "step": 6295, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.77031693, + "epoch": 4.94, + "learning_rate": 5.510086726639894e-05, + "loss": 0.70443668, + "memory(GiB)": 71.19, + "step": 6300, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.75785489, + "epoch": 4.94, + "learning_rate": 5.5036356683502e-05, + "loss": 0.7272397, + "memory(GiB)": 71.19, + "step": 6305, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.77772474, + "epoch": 4.95, + "learning_rate": 5.497183762980467e-05, + "loss": 0.67617226, + "memory(GiB)": 71.19, + "step": 6310, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.76613221, + "epoch": 4.95, + "learning_rate": 5.490731021382346e-05, + "loss": 0.71000295, + "memory(GiB)": 71.19, + "step": 6315, + "train_speed(iter/s)": 0.024195 + }, + { + "acc": 0.74996147, + "epoch": 4.95, + "learning_rate": 5.4842774544089e-05, + "loss": 0.75001779, + "memory(GiB)": 71.19, + "step": 6320, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.76159739, + "epoch": 4.96, + "learning_rate": 5.477823072914579e-05, + "loss": 0.71828237, + "memory(GiB)": 71.19, + "step": 6325, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.74771967, + "epoch": 4.96, + "learning_rate": 5.4713678877552e-05, + "loss": 0.77577434, + "memory(GiB)": 71.19, + "step": 6330, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.76517348, + "epoch": 4.97, + "learning_rate": 5.464911909787934e-05, + "loss": 0.72100554, + "memory(GiB)": 71.19, + "step": 6335, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.76637516, + "epoch": 4.97, + "learning_rate": 5.4584551498712865e-05, + "loss": 0.70268941, + "memory(GiB)": 71.19, + "step": 6340, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.76800013, + "epoch": 4.97, + "learning_rate": 5.451997618865078e-05, + "loss": 0.72028384, + "memory(GiB)": 71.19, + "step": 6345, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.76661825, + "epoch": 4.98, + "learning_rate": 5.445539327630422e-05, + "loss": 0.69725647, + "memory(GiB)": 71.19, + "step": 6350, + "train_speed(iter/s)": 0.024196 + }, + { + "acc": 0.75148797, + "epoch": 4.98, + "learning_rate": 5.439080287029716e-05, + "loss": 0.73803744, + "memory(GiB)": 71.19, + "step": 6355, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.75281067, + "epoch": 4.98, + "learning_rate": 5.432620507926616e-05, + "loss": 0.75406656, + "memory(GiB)": 71.19, + "step": 6360, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.76611757, + "epoch": 4.99, + "learning_rate": 5.426160001186016e-05, + "loss": 0.70822902, + "memory(GiB)": 71.19, + "step": 6365, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.77433181, + "epoch": 4.99, + "learning_rate": 5.41969877767404e-05, + "loss": 0.68499122, + "memory(GiB)": 71.19, + "step": 6370, + "train_speed(iter/s)": 0.024197 + }, + { + "acc": 0.7715888, + "epoch": 5.0, + "learning_rate": 5.413236848258015e-05, + "loss": 0.70902772, + "memory(GiB)": 71.19, + "step": 6375, + "train_speed(iter/s)": 0.024197 + }, + { + "epoch": 5.0, + "eval_acc": 0.7930127723516154, + "eval_loss": 0.5855495929718018, + "eval_runtime": 107.3505, + "eval_samples_per_second": 0.866, + "eval_steps_per_second": 0.866, + "step": 6379 + } + ], + "logging_steps": 5, + "max_steps": 12750, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 1, + "total_flos": 2.781799283988169e+22, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}