{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 20, "global_step": 2115, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_accuracy": 0.7339246119733924, "eval_f1": 0.24528301886792453, "eval_loss": 0.6025775074958801, "eval_precision": 0.6, "eval_recall": 0.1541501976284585, "eval_runtime": 48.7601, "eval_samples_per_second": 5.66, "eval_steps_per_second": 0.185, "step": 0 }, { "epoch": 0.00047281323877068556, "grad_norm": 2.2663159370422363, "learning_rate": 9.433962264150944e-08, "loss": 0.6749, "step": 1 }, { "epoch": 0.0009456264775413711, "grad_norm": 2.2706336975097656, "learning_rate": 1.886792452830189e-07, "loss": 0.6176, "step": 2 }, { "epoch": 0.0014184397163120568, "grad_norm": 3.000195026397705, "learning_rate": 2.8301886792452833e-07, "loss": 0.5503, "step": 3 }, { "epoch": 0.0018912529550827422, "grad_norm": 1.9788570404052734, "learning_rate": 3.773584905660378e-07, "loss": 0.5419, "step": 4 }, { "epoch": 0.002364066193853428, "grad_norm": 2.908334255218506, "learning_rate": 4.716981132075472e-07, "loss": 0.6287, "step": 5 }, { "epoch": 0.0028368794326241137, "grad_norm": 2.66474986076355, "learning_rate": 5.660377358490567e-07, "loss": 0.6961, "step": 6 }, { "epoch": 0.003309692671394799, "grad_norm": 2.1652109622955322, "learning_rate": 6.603773584905661e-07, "loss": 0.6128, "step": 7 }, { "epoch": 0.0037825059101654845, "grad_norm": 2.8184926509857178, "learning_rate": 7.547169811320755e-07, "loss": 0.6289, "step": 8 }, { "epoch": 0.00425531914893617, "grad_norm": 2.4191572666168213, "learning_rate": 8.490566037735849e-07, "loss": 0.6109, "step": 9 }, { "epoch": 0.004728132387706856, "grad_norm": 2.1350443363189697, "learning_rate": 9.433962264150944e-07, "loss": 0.6027, "step": 10 }, { "epoch": 0.005200945626477541, "grad_norm": 2.3789725303649902, "learning_rate": 1.037735849056604e-06, "loss": 0.6009, "step": 11 }, { "epoch": 0.005673758865248227, "grad_norm": 2.0987956523895264, "learning_rate": 1.1320754716981133e-06, "loss": 0.5696, "step": 12 }, { "epoch": 0.006146572104018913, "grad_norm": 2.1385180950164795, "learning_rate": 1.2264150943396227e-06, "loss": 0.6081, "step": 13 }, { "epoch": 0.006619385342789598, "grad_norm": 2.550551652908325, "learning_rate": 1.3207547169811322e-06, "loss": 0.6091, "step": 14 }, { "epoch": 0.0070921985815602835, "grad_norm": 2.2968673706054688, "learning_rate": 1.4150943396226415e-06, "loss": 0.6279, "step": 15 }, { "epoch": 0.007565011820330969, "grad_norm": 2.0703189373016357, "learning_rate": 1.509433962264151e-06, "loss": 0.5747, "step": 16 }, { "epoch": 0.008037825059101654, "grad_norm": 3.0899605751037598, "learning_rate": 1.6037735849056604e-06, "loss": 0.6645, "step": 17 }, { "epoch": 0.00851063829787234, "grad_norm": 1.9014838933944702, "learning_rate": 1.6981132075471698e-06, "loss": 0.5237, "step": 18 }, { "epoch": 0.008983451536643027, "grad_norm": 2.2065110206604004, "learning_rate": 1.7924528301886793e-06, "loss": 0.5515, "step": 19 }, { "epoch": 0.009456264775413711, "grad_norm": 1.8808679580688477, "learning_rate": 1.8867924528301889e-06, "loss": 0.5256, "step": 20 }, { "epoch": 0.009456264775413711, "eval_accuracy": 0.7350332594235033, "eval_f1": 0.2507836990595611, "eval_loss": 0.6015310287475586, "eval_precision": 0.6060606060606061, "eval_recall": 0.15810276679841898, "eval_runtime": 49.4054, "eval_samples_per_second": 5.586, "eval_steps_per_second": 0.182, "step": 20 }, { "epoch": 0.009929078014184398, "grad_norm": 1.7238119840621948, "learning_rate": 1.981132075471698e-06, "loss": 0.5206, "step": 21 }, { "epoch": 0.010401891252955082, "grad_norm": 2.4662513732910156, "learning_rate": 2.075471698113208e-06, "loss": 0.6127, "step": 22 }, { "epoch": 0.010874704491725768, "grad_norm": 2.3498942852020264, "learning_rate": 2.1698113207547173e-06, "loss": 0.6588, "step": 23 }, { "epoch": 0.011347517730496455, "grad_norm": 2.9931042194366455, "learning_rate": 2.2641509433962266e-06, "loss": 0.5891, "step": 24 }, { "epoch": 0.01182033096926714, "grad_norm": 2.32051944732666, "learning_rate": 2.358490566037736e-06, "loss": 0.599, "step": 25 }, { "epoch": 0.012293144208037825, "grad_norm": 3.134979486465454, "learning_rate": 2.4528301886792453e-06, "loss": 0.6473, "step": 26 }, { "epoch": 0.01276595744680851, "grad_norm": 2.371523857116699, "learning_rate": 2.547169811320755e-06, "loss": 0.6037, "step": 27 }, { "epoch": 0.013238770685579196, "grad_norm": 2.681360960006714, "learning_rate": 2.6415094339622644e-06, "loss": 0.5573, "step": 28 }, { "epoch": 0.013711583924349883, "grad_norm": 3.202848434448242, "learning_rate": 2.7358490566037738e-06, "loss": 0.6697, "step": 29 }, { "epoch": 0.014184397163120567, "grad_norm": 2.109222888946533, "learning_rate": 2.830188679245283e-06, "loss": 0.6073, "step": 30 }, { "epoch": 0.014657210401891253, "grad_norm": 2.031996250152588, "learning_rate": 2.9245283018867924e-06, "loss": 0.5569, "step": 31 }, { "epoch": 0.015130023640661938, "grad_norm": 2.5829803943634033, "learning_rate": 3.018867924528302e-06, "loss": 0.5503, "step": 32 }, { "epoch": 0.015602836879432624, "grad_norm": 3.7826120853424072, "learning_rate": 3.1132075471698115e-06, "loss": 0.4966, "step": 33 }, { "epoch": 0.01607565011820331, "grad_norm": 1.3336539268493652, "learning_rate": 3.207547169811321e-06, "loss": 0.4965, "step": 34 }, { "epoch": 0.016548463356973995, "grad_norm": 2.4206223487854004, "learning_rate": 3.30188679245283e-06, "loss": 0.6583, "step": 35 }, { "epoch": 0.01702127659574468, "grad_norm": 2.437887191772461, "learning_rate": 3.3962264150943395e-06, "loss": 0.5355, "step": 36 }, { "epoch": 0.017494089834515367, "grad_norm": 2.00358510017395, "learning_rate": 3.4905660377358493e-06, "loss": 0.5854, "step": 37 }, { "epoch": 0.017966903073286054, "grad_norm": 2.635550022125244, "learning_rate": 3.5849056603773586e-06, "loss": 0.6226, "step": 38 }, { "epoch": 0.018439716312056736, "grad_norm": 1.9979338645935059, "learning_rate": 3.679245283018868e-06, "loss": 0.6202, "step": 39 }, { "epoch": 0.018912529550827423, "grad_norm": 2.933034658432007, "learning_rate": 3.7735849056603777e-06, "loss": 0.6118, "step": 40 }, { "epoch": 0.018912529550827423, "eval_accuracy": 0.7361419068736141, "eval_f1": 0.2608695652173913, "eval_loss": 0.5987924337387085, "eval_precision": 0.6086956521739131, "eval_recall": 0.16600790513833993, "eval_runtime": 49.5092, "eval_samples_per_second": 5.575, "eval_steps_per_second": 0.182, "step": 40 }, { "epoch": 0.01938534278959811, "grad_norm": 2.4579005241394043, "learning_rate": 3.8679245283018875e-06, "loss": 0.6329, "step": 41 }, { "epoch": 0.019858156028368795, "grad_norm": 2.0814871788024902, "learning_rate": 3.962264150943396e-06, "loss": 0.5792, "step": 42 }, { "epoch": 0.02033096926713948, "grad_norm": 4.046266078948975, "learning_rate": 4.056603773584906e-06, "loss": 0.6294, "step": 43 }, { "epoch": 0.020803782505910164, "grad_norm": 1.990343451499939, "learning_rate": 4.150943396226416e-06, "loss": 0.5279, "step": 44 }, { "epoch": 0.02127659574468085, "grad_norm": 2.2721312046051025, "learning_rate": 4.245283018867925e-06, "loss": 0.5947, "step": 45 }, { "epoch": 0.021749408983451537, "grad_norm": 2.3753161430358887, "learning_rate": 4.339622641509435e-06, "loss": 0.6097, "step": 46 }, { "epoch": 0.022222222222222223, "grad_norm": 2.2465322017669678, "learning_rate": 4.4339622641509435e-06, "loss": 0.5921, "step": 47 }, { "epoch": 0.02269503546099291, "grad_norm": 1.9690579175949097, "learning_rate": 4.528301886792453e-06, "loss": 0.5191, "step": 48 }, { "epoch": 0.023167848699763592, "grad_norm": 2.9993767738342285, "learning_rate": 4.622641509433963e-06, "loss": 0.5827, "step": 49 }, { "epoch": 0.02364066193853428, "grad_norm": 2.3443307876586914, "learning_rate": 4.716981132075472e-06, "loss": 0.5746, "step": 50 }, { "epoch": 0.024113475177304965, "grad_norm": 2.446950912475586, "learning_rate": 4.811320754716982e-06, "loss": 0.582, "step": 51 }, { "epoch": 0.02458628841607565, "grad_norm": 3.164130687713623, "learning_rate": 4.905660377358491e-06, "loss": 0.6406, "step": 52 }, { "epoch": 0.025059101654846337, "grad_norm": 2.339772939682007, "learning_rate": 5e-06, "loss": 0.5627, "step": 53 }, { "epoch": 0.02553191489361702, "grad_norm": 2.548523187637329, "learning_rate": 5.09433962264151e-06, "loss": 0.5909, "step": 54 }, { "epoch": 0.026004728132387706, "grad_norm": 3.006196975708008, "learning_rate": 5.188679245283019e-06, "loss": 0.6046, "step": 55 }, { "epoch": 0.026477541371158392, "grad_norm": 2.3136887550354004, "learning_rate": 5.283018867924529e-06, "loss": 0.5235, "step": 56 }, { "epoch": 0.02695035460992908, "grad_norm": 2.072728157043457, "learning_rate": 5.377358490566038e-06, "loss": 0.6377, "step": 57 }, { "epoch": 0.027423167848699765, "grad_norm": 3.415151357650757, "learning_rate": 5.4716981132075475e-06, "loss": 0.6367, "step": 58 }, { "epoch": 0.027895981087470448, "grad_norm": 2.400956869125366, "learning_rate": 5.566037735849057e-06, "loss": 0.5671, "step": 59 }, { "epoch": 0.028368794326241134, "grad_norm": 2.0561230182647705, "learning_rate": 5.660377358490566e-06, "loss": 0.5575, "step": 60 }, { "epoch": 0.028368794326241134, "eval_accuracy": 0.7450110864745011, "eval_f1": 0.3072289156626506, "eval_loss": 0.5848703384399414, "eval_precision": 0.6455696202531646, "eval_recall": 0.2015810276679842, "eval_runtime": 49.0008, "eval_samples_per_second": 5.633, "eval_steps_per_second": 0.184, "step": 60 }, { "epoch": 0.02884160756501182, "grad_norm": 2.189640522003174, "learning_rate": 5.754716981132076e-06, "loss": 0.619, "step": 61 }, { "epoch": 0.029314420803782507, "grad_norm": 2.607837677001953, "learning_rate": 5.849056603773585e-06, "loss": 0.6095, "step": 62 }, { "epoch": 0.029787234042553193, "grad_norm": 2.26078200340271, "learning_rate": 5.943396226415095e-06, "loss": 0.5471, "step": 63 }, { "epoch": 0.030260047281323876, "grad_norm": 2.622464656829834, "learning_rate": 6.037735849056604e-06, "loss": 0.6509, "step": 64 }, { "epoch": 0.030732860520094562, "grad_norm": 2.8349571228027344, "learning_rate": 6.132075471698113e-06, "loss": 0.6926, "step": 65 }, { "epoch": 0.031205673758865248, "grad_norm": 2.139317750930786, "learning_rate": 6.226415094339623e-06, "loss": 0.5786, "step": 66 }, { "epoch": 0.03167848699763593, "grad_norm": 3.0620882511138916, "learning_rate": 6.320754716981132e-06, "loss": 0.5841, "step": 67 }, { "epoch": 0.03215130023640662, "grad_norm": 2.194460391998291, "learning_rate": 6.415094339622642e-06, "loss": 0.5746, "step": 68 }, { "epoch": 0.032624113475177303, "grad_norm": 2.3444063663482666, "learning_rate": 6.5094339622641515e-06, "loss": 0.51, "step": 69 }, { "epoch": 0.03309692671394799, "grad_norm": 3.622739791870117, "learning_rate": 6.60377358490566e-06, "loss": 0.6342, "step": 70 }, { "epoch": 0.033569739952718676, "grad_norm": 2.9004671573638916, "learning_rate": 6.69811320754717e-06, "loss": 0.641, "step": 71 }, { "epoch": 0.03404255319148936, "grad_norm": 2.351501941680908, "learning_rate": 6.792452830188679e-06, "loss": 0.5936, "step": 72 }, { "epoch": 0.03451536643026005, "grad_norm": 2.6966824531555176, "learning_rate": 6.886792452830189e-06, "loss": 0.5755, "step": 73 }, { "epoch": 0.034988179669030735, "grad_norm": 2.026407241821289, "learning_rate": 6.981132075471699e-06, "loss": 0.4305, "step": 74 }, { "epoch": 0.03546099290780142, "grad_norm": 2.9599199295043945, "learning_rate": 7.0754716981132075e-06, "loss": 0.494, "step": 75 }, { "epoch": 0.03593380614657211, "grad_norm": 2.460238218307495, "learning_rate": 7.169811320754717e-06, "loss": 0.5991, "step": 76 }, { "epoch": 0.03640661938534279, "grad_norm": 3.174283266067505, "learning_rate": 7.264150943396226e-06, "loss": 0.6035, "step": 77 }, { "epoch": 0.03687943262411347, "grad_norm": 2.4575035572052, "learning_rate": 7.358490566037736e-06, "loss": 0.549, "step": 78 }, { "epoch": 0.03735224586288416, "grad_norm": 2.558811664581299, "learning_rate": 7.452830188679246e-06, "loss": 0.4979, "step": 79 }, { "epoch": 0.037825059101654845, "grad_norm": 2.396045684814453, "learning_rate": 7.5471698113207555e-06, "loss": 0.6385, "step": 80 }, { "epoch": 0.037825059101654845, "eval_accuracy": 0.746119733924612, "eval_f1": 0.3989501312335958, "eval_loss": 0.5647635459899902, "eval_precision": 0.59375, "eval_recall": 0.30039525691699603, "eval_runtime": 49.2336, "eval_samples_per_second": 5.606, "eval_steps_per_second": 0.183, "step": 80 }, { "epoch": 0.03829787234042553, "grad_norm": 3.1989941596984863, "learning_rate": 7.641509433962266e-06, "loss": 0.6269, "step": 81 }, { "epoch": 0.03877068557919622, "grad_norm": 2.829859972000122, "learning_rate": 7.735849056603775e-06, "loss": 0.5911, "step": 82 }, { "epoch": 0.039243498817966904, "grad_norm": 2.1976866722106934, "learning_rate": 7.830188679245284e-06, "loss": 0.5403, "step": 83 }, { "epoch": 0.03971631205673759, "grad_norm": 2.3576695919036865, "learning_rate": 7.924528301886793e-06, "loss": 0.5199, "step": 84 }, { "epoch": 0.04018912529550828, "grad_norm": 2.662384033203125, "learning_rate": 8.018867924528303e-06, "loss": 0.5898, "step": 85 }, { "epoch": 0.04066193853427896, "grad_norm": 4.871118545532227, "learning_rate": 8.113207547169812e-06, "loss": 0.5394, "step": 86 }, { "epoch": 0.04113475177304964, "grad_norm": 2.710362195968628, "learning_rate": 8.207547169811321e-06, "loss": 0.509, "step": 87 }, { "epoch": 0.04160756501182033, "grad_norm": 2.387660264968872, "learning_rate": 8.301886792452832e-06, "loss": 0.567, "step": 88 }, { "epoch": 0.042080378250591015, "grad_norm": 2.4443883895874023, "learning_rate": 8.39622641509434e-06, "loss": 0.5319, "step": 89 }, { "epoch": 0.0425531914893617, "grad_norm": 2.9741415977478027, "learning_rate": 8.49056603773585e-06, "loss": 0.6318, "step": 90 }, { "epoch": 0.04302600472813239, "grad_norm": 2.7385494709014893, "learning_rate": 8.58490566037736e-06, "loss": 0.6109, "step": 91 }, { "epoch": 0.043498817966903074, "grad_norm": 3.7744197845458984, "learning_rate": 8.67924528301887e-06, "loss": 0.6275, "step": 92 }, { "epoch": 0.04397163120567376, "grad_norm": 3.1745519638061523, "learning_rate": 8.773584905660378e-06, "loss": 0.5503, "step": 93 }, { "epoch": 0.044444444444444446, "grad_norm": 3.254016399383545, "learning_rate": 8.867924528301887e-06, "loss": 0.6103, "step": 94 }, { "epoch": 0.04491725768321513, "grad_norm": 2.4502315521240234, "learning_rate": 8.962264150943398e-06, "loss": 0.4538, "step": 95 }, { "epoch": 0.04539007092198582, "grad_norm": 2.24422025680542, "learning_rate": 9.056603773584907e-06, "loss": 0.5061, "step": 96 }, { "epoch": 0.0458628841607565, "grad_norm": 3.284022092819214, "learning_rate": 9.150943396226416e-06, "loss": 0.579, "step": 97 }, { "epoch": 0.046335697399527184, "grad_norm": 2.722243309020996, "learning_rate": 9.245283018867926e-06, "loss": 0.5395, "step": 98 }, { "epoch": 0.04680851063829787, "grad_norm": 2.641986131668091, "learning_rate": 9.339622641509435e-06, "loss": 0.5201, "step": 99 }, { "epoch": 0.04728132387706856, "grad_norm": 2.5733094215393066, "learning_rate": 9.433962264150944e-06, "loss": 0.4791, "step": 100 }, { "epoch": 0.04728132387706856, "eval_accuracy": 0.7660753880266076, "eval_f1": 0.4403183023872679, "eval_loss": 0.539648711681366, "eval_precision": 0.6693548387096774, "eval_recall": 0.32806324110671936, "eval_runtime": 50.2993, "eval_samples_per_second": 5.487, "eval_steps_per_second": 0.179, "step": 100 }, { "epoch": 0.04775413711583924, "grad_norm": 2.3666300773620605, "learning_rate": 9.528301886792455e-06, "loss": 0.4835, "step": 101 }, { "epoch": 0.04822695035460993, "grad_norm": 3.929025888442993, "learning_rate": 9.622641509433963e-06, "loss": 0.48, "step": 102 }, { "epoch": 0.048699763593380616, "grad_norm": 2.604964017868042, "learning_rate": 9.716981132075472e-06, "loss": 0.4988, "step": 103 }, { "epoch": 0.0491725768321513, "grad_norm": 2.985452890396118, "learning_rate": 9.811320754716981e-06, "loss": 0.4611, "step": 104 }, { "epoch": 0.04964539007092199, "grad_norm": 3.0728108882904053, "learning_rate": 9.905660377358492e-06, "loss": 0.4563, "step": 105 }, { "epoch": 0.050118203309692674, "grad_norm": 2.5450596809387207, "learning_rate": 1e-05, "loss": 0.399, "step": 106 }, { "epoch": 0.050591016548463354, "grad_norm": 4.241573810577393, "learning_rate": 1.0094339622641511e-05, "loss": 0.659, "step": 107 }, { "epoch": 0.05106382978723404, "grad_norm": 2.582282781600952, "learning_rate": 1.018867924528302e-05, "loss": 0.4278, "step": 108 }, { "epoch": 0.051536643026004726, "grad_norm": 3.337094306945801, "learning_rate": 1.0283018867924531e-05, "loss": 0.4334, "step": 109 }, { "epoch": 0.05200945626477541, "grad_norm": 2.199113368988037, "learning_rate": 1.0377358490566038e-05, "loss": 0.3644, "step": 110 }, { "epoch": 0.0524822695035461, "grad_norm": 3.2351200580596924, "learning_rate": 1.0471698113207549e-05, "loss": 0.506, "step": 111 }, { "epoch": 0.052955082742316785, "grad_norm": 3.9023163318634033, "learning_rate": 1.0566037735849058e-05, "loss": 0.5263, "step": 112 }, { "epoch": 0.05342789598108747, "grad_norm": 2.8746888637542725, "learning_rate": 1.0660377358490568e-05, "loss": 0.4335, "step": 113 }, { "epoch": 0.05390070921985816, "grad_norm": 3.934784173965454, "learning_rate": 1.0754716981132076e-05, "loss": 0.4804, "step": 114 }, { "epoch": 0.054373522458628844, "grad_norm": 3.4959094524383545, "learning_rate": 1.0849056603773586e-05, "loss": 0.4114, "step": 115 }, { "epoch": 0.05484633569739953, "grad_norm": 3.6590819358825684, "learning_rate": 1.0943396226415095e-05, "loss": 0.4618, "step": 116 }, { "epoch": 0.05531914893617021, "grad_norm": 3.4006142616271973, "learning_rate": 1.1037735849056606e-05, "loss": 0.4839, "step": 117 }, { "epoch": 0.055791962174940896, "grad_norm": 3.9192683696746826, "learning_rate": 1.1132075471698115e-05, "loss": 0.484, "step": 118 }, { "epoch": 0.05626477541371158, "grad_norm": 3.4559454917907715, "learning_rate": 1.1226415094339625e-05, "loss": 0.5247, "step": 119 }, { "epoch": 0.05673758865248227, "grad_norm": 4.34246301651001, "learning_rate": 1.1320754716981132e-05, "loss": 0.3593, "step": 120 }, { "epoch": 0.05673758865248227, "eval_accuracy": 0.779379157427938, "eval_f1": 0.4776902887139108, "eval_loss": 0.5030393600463867, "eval_precision": 0.7109375, "eval_recall": 0.35968379446640314, "eval_runtime": 47.339, "eval_samples_per_second": 5.83, "eval_steps_per_second": 0.19, "step": 120 }, { "epoch": 0.057210401891252954, "grad_norm": 4.863562107086182, "learning_rate": 1.1415094339622643e-05, "loss": 0.4712, "step": 121 }, { "epoch": 0.05768321513002364, "grad_norm": 3.856417179107666, "learning_rate": 1.1509433962264152e-05, "loss": 0.4069, "step": 122 }, { "epoch": 0.05815602836879433, "grad_norm": 3.3835558891296387, "learning_rate": 1.1603773584905663e-05, "loss": 0.4641, "step": 123 }, { "epoch": 0.05862884160756501, "grad_norm": 4.175307750701904, "learning_rate": 1.169811320754717e-05, "loss": 0.4452, "step": 124 }, { "epoch": 0.0591016548463357, "grad_norm": 5.4297356605529785, "learning_rate": 1.179245283018868e-05, "loss": 0.3464, "step": 125 }, { "epoch": 0.059574468085106386, "grad_norm": 3.767544746398926, "learning_rate": 1.188679245283019e-05, "loss": 0.4649, "step": 126 }, { "epoch": 0.06004728132387707, "grad_norm": 3.859020233154297, "learning_rate": 1.19811320754717e-05, "loss": 0.4034, "step": 127 }, { "epoch": 0.06052009456264775, "grad_norm": 5.159704685211182, "learning_rate": 1.2075471698113209e-05, "loss": 0.5041, "step": 128 }, { "epoch": 0.06099290780141844, "grad_norm": 4.291565418243408, "learning_rate": 1.216981132075472e-05, "loss": 0.4173, "step": 129 }, { "epoch": 0.061465721040189124, "grad_norm": 4.175761699676514, "learning_rate": 1.2264150943396227e-05, "loss": 0.391, "step": 130 }, { "epoch": 0.06193853427895981, "grad_norm": 5.98757266998291, "learning_rate": 1.2358490566037737e-05, "loss": 0.4565, "step": 131 }, { "epoch": 0.062411347517730496, "grad_norm": 6.7860307693481445, "learning_rate": 1.2452830188679246e-05, "loss": 0.4116, "step": 132 }, { "epoch": 0.06288416075650118, "grad_norm": 7.493508338928223, "learning_rate": 1.2547169811320757e-05, "loss": 0.4762, "step": 133 }, { "epoch": 0.06335697399527186, "grad_norm": 4.3719964027404785, "learning_rate": 1.2641509433962264e-05, "loss": 0.4301, "step": 134 }, { "epoch": 0.06382978723404255, "grad_norm": 5.992913246154785, "learning_rate": 1.2735849056603775e-05, "loss": 0.4892, "step": 135 }, { "epoch": 0.06430260047281323, "grad_norm": 6.05405330657959, "learning_rate": 1.2830188679245283e-05, "loss": 0.4004, "step": 136 }, { "epoch": 0.06477541371158392, "grad_norm": 6.542272090911865, "learning_rate": 1.2924528301886794e-05, "loss": 0.5386, "step": 137 }, { "epoch": 0.06524822695035461, "grad_norm": 5.304028511047363, "learning_rate": 1.3018867924528303e-05, "loss": 0.3309, "step": 138 }, { "epoch": 0.0657210401891253, "grad_norm": 4.330917835235596, "learning_rate": 1.3113207547169814e-05, "loss": 0.3509, "step": 139 }, { "epoch": 0.06619385342789598, "grad_norm": 6.812550067901611, "learning_rate": 1.320754716981132e-05, "loss": 0.4435, "step": 140 }, { "epoch": 0.06619385342789598, "eval_accuracy": 0.779379157427938, "eval_f1": 0.5446224256292906, "eval_loss": 0.4715713858604431, "eval_precision": 0.6467391304347826, "eval_recall": 0.47035573122529645, "eval_runtime": 48.4446, "eval_samples_per_second": 5.697, "eval_steps_per_second": 0.186, "step": 140 }, { "epoch": 0.06666666666666667, "grad_norm": 7.422946453094482, "learning_rate": 1.3301886792452831e-05, "loss": 0.406, "step": 141 }, { "epoch": 0.06713947990543735, "grad_norm": 7.327658653259277, "learning_rate": 1.339622641509434e-05, "loss": 0.4366, "step": 142 }, { "epoch": 0.06761229314420804, "grad_norm": 6.69068717956543, "learning_rate": 1.3490566037735851e-05, "loss": 0.4263, "step": 143 }, { "epoch": 0.06808510638297872, "grad_norm": 4.780946254730225, "learning_rate": 1.3584905660377358e-05, "loss": 0.325, "step": 144 }, { "epoch": 0.06855791962174941, "grad_norm": 6.016948699951172, "learning_rate": 1.3679245283018869e-05, "loss": 0.4426, "step": 145 }, { "epoch": 0.0690307328605201, "grad_norm": 5.669694900512695, "learning_rate": 1.3773584905660378e-05, "loss": 0.3957, "step": 146 }, { "epoch": 0.06950354609929078, "grad_norm": 6.2454609870910645, "learning_rate": 1.3867924528301888e-05, "loss": 0.3033, "step": 147 }, { "epoch": 0.06997635933806147, "grad_norm": 5.8120808601379395, "learning_rate": 1.3962264150943397e-05, "loss": 0.4472, "step": 148 }, { "epoch": 0.07044917257683216, "grad_norm": 6.466278553009033, "learning_rate": 1.4056603773584908e-05, "loss": 0.3469, "step": 149 }, { "epoch": 0.07092198581560284, "grad_norm": 8.212775230407715, "learning_rate": 1.4150943396226415e-05, "loss": 0.4478, "step": 150 }, { "epoch": 0.07139479905437353, "grad_norm": 7.582151889801025, "learning_rate": 1.4245283018867926e-05, "loss": 0.4312, "step": 151 }, { "epoch": 0.07186761229314421, "grad_norm": 5.214906215667725, "learning_rate": 1.4339622641509435e-05, "loss": 0.3446, "step": 152 }, { "epoch": 0.07234042553191489, "grad_norm": 4.743616580963135, "learning_rate": 1.4433962264150945e-05, "loss": 0.2665, "step": 153 }, { "epoch": 0.07281323877068557, "grad_norm": 5.460316181182861, "learning_rate": 1.4528301886792452e-05, "loss": 0.4369, "step": 154 }, { "epoch": 0.07328605200945626, "grad_norm": 7.11004638671875, "learning_rate": 1.4622641509433963e-05, "loss": 0.3813, "step": 155 }, { "epoch": 0.07375886524822695, "grad_norm": 6.461905479431152, "learning_rate": 1.4716981132075472e-05, "loss": 0.3413, "step": 156 }, { "epoch": 0.07423167848699763, "grad_norm": 6.668741226196289, "learning_rate": 1.4811320754716983e-05, "loss": 0.425, "step": 157 }, { "epoch": 0.07470449172576832, "grad_norm": 7.922025203704834, "learning_rate": 1.4905660377358491e-05, "loss": 0.3927, "step": 158 }, { "epoch": 0.075177304964539, "grad_norm": 5.079823017120361, "learning_rate": 1.5000000000000002e-05, "loss": 0.34, "step": 159 }, { "epoch": 0.07565011820330969, "grad_norm": 5.172731876373291, "learning_rate": 1.5094339622641511e-05, "loss": 0.3899, "step": 160 }, { "epoch": 0.07565011820330969, "eval_accuracy": 0.7937915742793792, "eval_f1": 0.5181347150259067, "eval_loss": 0.4403148889541626, "eval_precision": 0.7518796992481203, "eval_recall": 0.3952569169960474, "eval_runtime": 48.1121, "eval_samples_per_second": 5.737, "eval_steps_per_second": 0.187, "step": 160 }, { "epoch": 0.07612293144208038, "grad_norm": 6.415732383728027, "learning_rate": 1.518867924528302e-05, "loss": 0.3399, "step": 161 }, { "epoch": 0.07659574468085106, "grad_norm": 6.380404472351074, "learning_rate": 1.5283018867924532e-05, "loss": 0.4384, "step": 162 }, { "epoch": 0.07706855791962175, "grad_norm": 4.902369976043701, "learning_rate": 1.5377358490566038e-05, "loss": 0.3413, "step": 163 }, { "epoch": 0.07754137115839244, "grad_norm": 5.686254024505615, "learning_rate": 1.547169811320755e-05, "loss": 0.4252, "step": 164 }, { "epoch": 0.07801418439716312, "grad_norm": 6.227957725524902, "learning_rate": 1.556603773584906e-05, "loss": 0.3132, "step": 165 }, { "epoch": 0.07848699763593381, "grad_norm": 8.092106819152832, "learning_rate": 1.5660377358490568e-05, "loss": 0.5402, "step": 166 }, { "epoch": 0.0789598108747045, "grad_norm": 6.633399486541748, "learning_rate": 1.5754716981132077e-05, "loss": 0.3574, "step": 167 }, { "epoch": 0.07943262411347518, "grad_norm": 7.712852954864502, "learning_rate": 1.5849056603773586e-05, "loss": 0.435, "step": 168 }, { "epoch": 0.07990543735224587, "grad_norm": 4.21342134475708, "learning_rate": 1.5943396226415095e-05, "loss": 0.3245, "step": 169 }, { "epoch": 0.08037825059101655, "grad_norm": 4.879771709442139, "learning_rate": 1.6037735849056607e-05, "loss": 0.2535, "step": 170 }, { "epoch": 0.08085106382978724, "grad_norm": 7.206470966339111, "learning_rate": 1.6132075471698116e-05, "loss": 0.3628, "step": 171 }, { "epoch": 0.08132387706855793, "grad_norm": 4.479485034942627, "learning_rate": 1.6226415094339625e-05, "loss": 0.3318, "step": 172 }, { "epoch": 0.0817966903073286, "grad_norm": 6.472604751586914, "learning_rate": 1.6320754716981134e-05, "loss": 0.4404, "step": 173 }, { "epoch": 0.08226950354609928, "grad_norm": 4.094892501831055, "learning_rate": 1.6415094339622643e-05, "loss": 0.2986, "step": 174 }, { "epoch": 0.08274231678486997, "grad_norm": 5.433969974517822, "learning_rate": 1.650943396226415e-05, "loss": 0.3356, "step": 175 }, { "epoch": 0.08321513002364066, "grad_norm": 5.57079553604126, "learning_rate": 1.6603773584905664e-05, "loss": 0.3288, "step": 176 }, { "epoch": 0.08368794326241134, "grad_norm": 5.4054460525512695, "learning_rate": 1.669811320754717e-05, "loss": 0.3688, "step": 177 }, { "epoch": 0.08416075650118203, "grad_norm": 6.414549350738525, "learning_rate": 1.679245283018868e-05, "loss": 0.3791, "step": 178 }, { "epoch": 0.08463356973995272, "grad_norm": 6.032560348510742, "learning_rate": 1.688679245283019e-05, "loss": 0.3142, "step": 179 }, { "epoch": 0.0851063829787234, "grad_norm": 6.080160617828369, "learning_rate": 1.69811320754717e-05, "loss": 0.3429, "step": 180 }, { "epoch": 0.0851063829787234, "eval_accuracy": 0.8159645232815964, "eval_f1": 0.5951219512195122, "eval_loss": 0.4055093824863434, "eval_precision": 0.7770700636942676, "eval_recall": 0.48221343873517786, "eval_runtime": 47.3649, "eval_samples_per_second": 5.827, "eval_steps_per_second": 0.19, "step": 180 }, { "epoch": 0.08557919621749409, "grad_norm": 5.409966468811035, "learning_rate": 1.707547169811321e-05, "loss": 0.2881, "step": 181 }, { "epoch": 0.08605200945626477, "grad_norm": 7.760888576507568, "learning_rate": 1.716981132075472e-05, "loss": 0.4003, "step": 182 }, { "epoch": 0.08652482269503546, "grad_norm": 6.271183013916016, "learning_rate": 1.7264150943396226e-05, "loss": 0.3343, "step": 183 }, { "epoch": 0.08699763593380615, "grad_norm": 7.139448165893555, "learning_rate": 1.735849056603774e-05, "loss": 0.3246, "step": 184 }, { "epoch": 0.08747044917257683, "grad_norm": 5.225427627563477, "learning_rate": 1.7452830188679247e-05, "loss": 0.3042, "step": 185 }, { "epoch": 0.08794326241134752, "grad_norm": 7.858066558837891, "learning_rate": 1.7547169811320756e-05, "loss": 0.3843, "step": 186 }, { "epoch": 0.0884160756501182, "grad_norm": 7.103234767913818, "learning_rate": 1.7641509433962265e-05, "loss": 0.3878, "step": 187 }, { "epoch": 0.08888888888888889, "grad_norm": 8.041577339172363, "learning_rate": 1.7735849056603774e-05, "loss": 0.4482, "step": 188 }, { "epoch": 0.08936170212765958, "grad_norm": 6.207291126251221, "learning_rate": 1.7830188679245283e-05, "loss": 0.3709, "step": 189 }, { "epoch": 0.08983451536643026, "grad_norm": 5.278400897979736, "learning_rate": 1.7924528301886795e-05, "loss": 0.2548, "step": 190 }, { "epoch": 0.09030732860520095, "grad_norm": 6.8568854331970215, "learning_rate": 1.8018867924528304e-05, "loss": 0.3724, "step": 191 }, { "epoch": 0.09078014184397164, "grad_norm": 6.631660461425781, "learning_rate": 1.8113207547169813e-05, "loss": 0.3654, "step": 192 }, { "epoch": 0.09125295508274232, "grad_norm": 7.872669696807861, "learning_rate": 1.8207547169811322e-05, "loss": 0.3598, "step": 193 }, { "epoch": 0.091725768321513, "grad_norm": 5.30977725982666, "learning_rate": 1.830188679245283e-05, "loss": 0.316, "step": 194 }, { "epoch": 0.09219858156028368, "grad_norm": 6.427607536315918, "learning_rate": 1.839622641509434e-05, "loss": 0.3215, "step": 195 }, { "epoch": 0.09267139479905437, "grad_norm": 6.099403381347656, "learning_rate": 1.8490566037735852e-05, "loss": 0.3482, "step": 196 }, { "epoch": 0.09314420803782505, "grad_norm": 5.679231643676758, "learning_rate": 1.8584905660377358e-05, "loss": 0.3189, "step": 197 }, { "epoch": 0.09361702127659574, "grad_norm": 7.309570789337158, "learning_rate": 1.867924528301887e-05, "loss": 0.4746, "step": 198 }, { "epoch": 0.09408983451536643, "grad_norm": 8.899137496948242, "learning_rate": 1.877358490566038e-05, "loss": 0.5097, "step": 199 }, { "epoch": 0.09456264775413711, "grad_norm": 5.904891014099121, "learning_rate": 1.8867924528301888e-05, "loss": 0.3529, "step": 200 }, { "epoch": 0.09456264775413711, "eval_accuracy": 0.8181818181818182, "eval_f1": 0.6255707762557078, "eval_loss": 0.38472801446914673, "eval_precision": 0.7405405405405405, "eval_recall": 0.541501976284585, "eval_runtime": 48.2579, "eval_samples_per_second": 5.719, "eval_steps_per_second": 0.186, "step": 200 }, { "epoch": 0.0950354609929078, "grad_norm": 5.29753303527832, "learning_rate": 1.8962264150943397e-05, "loss": 0.2815, "step": 201 }, { "epoch": 0.09550827423167849, "grad_norm": 8.292261123657227, "learning_rate": 1.905660377358491e-05, "loss": 0.3791, "step": 202 }, { "epoch": 0.09598108747044917, "grad_norm": 5.616471290588379, "learning_rate": 1.9150943396226415e-05, "loss": 0.3434, "step": 203 }, { "epoch": 0.09645390070921986, "grad_norm": 4.834171772003174, "learning_rate": 1.9245283018867927e-05, "loss": 0.3383, "step": 204 }, { "epoch": 0.09692671394799054, "grad_norm": 6.36716890335083, "learning_rate": 1.9339622641509436e-05, "loss": 0.3548, "step": 205 }, { "epoch": 0.09739952718676123, "grad_norm": 5.7878899574279785, "learning_rate": 1.9433962264150945e-05, "loss": 0.3652, "step": 206 }, { "epoch": 0.09787234042553192, "grad_norm": 5.697458267211914, "learning_rate": 1.9528301886792454e-05, "loss": 0.2581, "step": 207 }, { "epoch": 0.0983451536643026, "grad_norm": 4.944214344024658, "learning_rate": 1.9622641509433963e-05, "loss": 0.3725, "step": 208 }, { "epoch": 0.09881796690307329, "grad_norm": 5.800679683685303, "learning_rate": 1.971698113207547e-05, "loss": 0.2957, "step": 209 }, { "epoch": 0.09929078014184398, "grad_norm": 5.455956935882568, "learning_rate": 1.9811320754716984e-05, "loss": 0.3826, "step": 210 }, { "epoch": 0.09976359338061466, "grad_norm": 4.2240519523620605, "learning_rate": 1.9905660377358493e-05, "loss": 0.276, "step": 211 }, { "epoch": 0.10023640661938535, "grad_norm": 4.200746059417725, "learning_rate": 2e-05, "loss": 0.2807, "step": 212 }, { "epoch": 0.10070921985815603, "grad_norm": 5.269329071044922, "learning_rate": 1.999998637325671e-05, "loss": 0.3351, "step": 213 }, { "epoch": 0.10118203309692671, "grad_norm": 4.950570583343506, "learning_rate": 1.999994549306397e-05, "loss": 0.3145, "step": 214 }, { "epoch": 0.1016548463356974, "grad_norm": 6.465134143829346, "learning_rate": 1.9999877359533202e-05, "loss": 0.351, "step": 215 }, { "epoch": 0.10212765957446808, "grad_norm": 6.148433685302734, "learning_rate": 1.9999781972850082e-05, "loss": 0.3563, "step": 216 }, { "epoch": 0.10260047281323877, "grad_norm": 4.79353666305542, "learning_rate": 1.9999659333274582e-05, "loss": 0.2827, "step": 217 }, { "epoch": 0.10307328605200945, "grad_norm": 5.91294002532959, "learning_rate": 1.9999509441140934e-05, "loss": 0.3741, "step": 218 }, { "epoch": 0.10354609929078014, "grad_norm": 6.508899688720703, "learning_rate": 1.9999332296857642e-05, "loss": 0.3454, "step": 219 }, { "epoch": 0.10401891252955082, "grad_norm": 6.511887073516846, "learning_rate": 1.9999127900907496e-05, "loss": 0.36, "step": 220 }, { "epoch": 0.10401891252955082, "eval_accuracy": 0.8181818181818182, "eval_f1": 0.6076555023923444, "eval_loss": 0.38239341974258423, "eval_precision": 0.7696969696969697, "eval_recall": 0.5019762845849802, "eval_runtime": 49.0986, "eval_samples_per_second": 5.621, "eval_steps_per_second": 0.183, "step": 220 }, { "epoch": 0.10449172576832151, "grad_norm": 5.294708728790283, "learning_rate": 1.9998896253847536e-05, "loss": 0.345, "step": 221 }, { "epoch": 0.1049645390070922, "grad_norm": 6.348694324493408, "learning_rate": 1.9998637356309088e-05, "loss": 0.2954, "step": 222 }, { "epoch": 0.10543735224586288, "grad_norm": 5.638726234436035, "learning_rate": 1.9998351208997734e-05, "loss": 0.3151, "step": 223 }, { "epoch": 0.10591016548463357, "grad_norm": 6.741607666015625, "learning_rate": 1.999803781269333e-05, "loss": 0.376, "step": 224 }, { "epoch": 0.10638297872340426, "grad_norm": 5.410939693450928, "learning_rate": 1.999769716824998e-05, "loss": 0.2694, "step": 225 }, { "epoch": 0.10685579196217494, "grad_norm": 6.066391944885254, "learning_rate": 1.9997329276596073e-05, "loss": 0.3443, "step": 226 }, { "epoch": 0.10732860520094563, "grad_norm": 6.088653087615967, "learning_rate": 1.999693413873423e-05, "loss": 0.3617, "step": 227 }, { "epoch": 0.10780141843971631, "grad_norm": 6.114930152893066, "learning_rate": 1.9996511755741346e-05, "loss": 0.2752, "step": 228 }, { "epoch": 0.108274231678487, "grad_norm": 5.094395160675049, "learning_rate": 1.999606212876856e-05, "loss": 0.2251, "step": 229 }, { "epoch": 0.10874704491725769, "grad_norm": 6.448328495025635, "learning_rate": 1.999558525904126e-05, "loss": 0.3729, "step": 230 }, { "epoch": 0.10921985815602837, "grad_norm": 5.548649787902832, "learning_rate": 1.9995081147859087e-05, "loss": 0.3591, "step": 231 }, { "epoch": 0.10969267139479906, "grad_norm": 8.929473876953125, "learning_rate": 1.9994549796595913e-05, "loss": 0.3535, "step": 232 }, { "epoch": 0.11016548463356975, "grad_norm": 4.452419757843018, "learning_rate": 1.9993991206699865e-05, "loss": 0.2647, "step": 233 }, { "epoch": 0.11063829787234042, "grad_norm": 4.958791732788086, "learning_rate": 1.999340537969329e-05, "loss": 0.3027, "step": 234 }, { "epoch": 0.1111111111111111, "grad_norm": 6.243503093719482, "learning_rate": 1.9992792317172765e-05, "loss": 0.3199, "step": 235 }, { "epoch": 0.11158392434988179, "grad_norm": 5.826799392700195, "learning_rate": 1.9992152020809113e-05, "loss": 0.3239, "step": 236 }, { "epoch": 0.11205673758865248, "grad_norm": 8.090536117553711, "learning_rate": 1.9991484492347353e-05, "loss": 0.4326, "step": 237 }, { "epoch": 0.11252955082742316, "grad_norm": 7.066166877746582, "learning_rate": 1.9990789733606733e-05, "loss": 0.2672, "step": 238 }, { "epoch": 0.11300236406619385, "grad_norm": 5.99971342086792, "learning_rate": 1.999006774648072e-05, "loss": 0.2984, "step": 239 }, { "epoch": 0.11347517730496454, "grad_norm": 5.290529727935791, "learning_rate": 1.998931853293698e-05, "loss": 0.2875, "step": 240 }, { "epoch": 0.11347517730496454, "eval_accuracy": 0.8226164079822617, "eval_f1": 0.6428571428571429, "eval_loss": 0.3577645719051361, "eval_precision": 0.7384615384615385, "eval_recall": 0.5691699604743083, "eval_runtime": 48.6337, "eval_samples_per_second": 5.675, "eval_steps_per_second": 0.185, "step": 240 }, { "epoch": 0.11394799054373522, "grad_norm": 6.742930889129639, "learning_rate": 1.9988542095017373e-05, "loss": 0.3273, "step": 241 }, { "epoch": 0.11442080378250591, "grad_norm": 6.42619514465332, "learning_rate": 1.9987738434837973e-05, "loss": 0.3949, "step": 242 }, { "epoch": 0.1148936170212766, "grad_norm": 5.628868103027344, "learning_rate": 1.9986907554589024e-05, "loss": 0.2947, "step": 243 }, { "epoch": 0.11536643026004728, "grad_norm": 6.224806308746338, "learning_rate": 1.9986049456534972e-05, "loss": 0.4323, "step": 244 }, { "epoch": 0.11583924349881797, "grad_norm": 8.325421333312988, "learning_rate": 1.9985164143014433e-05, "loss": 0.2801, "step": 245 }, { "epoch": 0.11631205673758865, "grad_norm": 6.2002129554748535, "learning_rate": 1.9984251616440195e-05, "loss": 0.2003, "step": 246 }, { "epoch": 0.11678486997635934, "grad_norm": 6.008997917175293, "learning_rate": 1.9983311879299203e-05, "loss": 0.3376, "step": 247 }, { "epoch": 0.11725768321513003, "grad_norm": 5.332534313201904, "learning_rate": 1.9982344934152577e-05, "loss": 0.3856, "step": 248 }, { "epoch": 0.11773049645390071, "grad_norm": 3.9309067726135254, "learning_rate": 1.9981350783635582e-05, "loss": 0.2775, "step": 249 }, { "epoch": 0.1182033096926714, "grad_norm": 6.058276653289795, "learning_rate": 1.9980329430457616e-05, "loss": 0.2785, "step": 250 }, { "epoch": 0.11867612293144209, "grad_norm": 4.346603870391846, "learning_rate": 1.997928087740222e-05, "loss": 0.2102, "step": 251 }, { "epoch": 0.11914893617021277, "grad_norm": 5.8436408042907715, "learning_rate": 1.9978205127327085e-05, "loss": 0.321, "step": 252 }, { "epoch": 0.11962174940898346, "grad_norm": 8.181411743164062, "learning_rate": 1.9977102183163984e-05, "loss": 0.3408, "step": 253 }, { "epoch": 0.12009456264775414, "grad_norm": 6.579717636108398, "learning_rate": 1.997597204791884e-05, "loss": 0.3031, "step": 254 }, { "epoch": 0.12056737588652482, "grad_norm": 6.751533031463623, "learning_rate": 1.9974814724671658e-05, "loss": 0.3331, "step": 255 }, { "epoch": 0.1210401891252955, "grad_norm": 5.326685905456543, "learning_rate": 1.9973630216576547e-05, "loss": 0.2865, "step": 256 }, { "epoch": 0.12151300236406619, "grad_norm": 6.129054546356201, "learning_rate": 1.9972418526861704e-05, "loss": 0.243, "step": 257 }, { "epoch": 0.12198581560283688, "grad_norm": 4.441340446472168, "learning_rate": 1.997117965882941e-05, "loss": 0.1915, "step": 258 }, { "epoch": 0.12245862884160756, "grad_norm": 6.2238569259643555, "learning_rate": 1.9969913615856015e-05, "loss": 0.3069, "step": 259 }, { "epoch": 0.12293144208037825, "grad_norm": 6.094357967376709, "learning_rate": 1.9968620401391917e-05, "loss": 0.3237, "step": 260 }, { "epoch": 0.12293144208037825, "eval_accuracy": 0.8425720620842572, "eval_f1": 0.710204081632653, "eval_loss": 0.34573158621788025, "eval_precision": 0.7341772151898734, "eval_recall": 0.6877470355731226, "eval_runtime": 49.3073, "eval_samples_per_second": 5.598, "eval_steps_per_second": 0.183, "step": 260 }, { "epoch": 0.12340425531914893, "grad_norm": 7.97859001159668, "learning_rate": 1.9967300018961582e-05, "loss": 0.235, "step": 261 }, { "epoch": 0.12387706855791962, "grad_norm": 8.9214448928833, "learning_rate": 1.9965952472163517e-05, "loss": 0.3719, "step": 262 }, { "epoch": 0.1243498817966903, "grad_norm": 9.066556930541992, "learning_rate": 1.996457776467025e-05, "loss": 0.3064, "step": 263 }, { "epoch": 0.12482269503546099, "grad_norm": 5.2799177169799805, "learning_rate": 1.996317590022834e-05, "loss": 0.3362, "step": 264 }, { "epoch": 0.12529550827423167, "grad_norm": 7.641961574554443, "learning_rate": 1.996174688265836e-05, "loss": 0.326, "step": 265 }, { "epoch": 0.12576832151300235, "grad_norm": 7.163477420806885, "learning_rate": 1.9960290715854874e-05, "loss": 0.2446, "step": 266 }, { "epoch": 0.12624113475177304, "grad_norm": 5.137381553649902, "learning_rate": 1.9958807403786452e-05, "loss": 0.2447, "step": 267 }, { "epoch": 0.12671394799054372, "grad_norm": 8.010651588439941, "learning_rate": 1.995729695049563e-05, "loss": 0.3647, "step": 268 }, { "epoch": 0.1271867612293144, "grad_norm": 5.50241231918335, "learning_rate": 1.995575936009893e-05, "loss": 0.3076, "step": 269 }, { "epoch": 0.1276595744680851, "grad_norm": 5.5639567375183105, "learning_rate": 1.995419463678681e-05, "loss": 0.3547, "step": 270 }, { "epoch": 0.12813238770685578, "grad_norm": 5.540798187255859, "learning_rate": 1.9952602784823688e-05, "loss": 0.385, "step": 271 }, { "epoch": 0.12860520094562647, "grad_norm": 8.423896789550781, "learning_rate": 1.9950983808547923e-05, "loss": 0.2973, "step": 272 }, { "epoch": 0.12907801418439716, "grad_norm": 7.910554885864258, "learning_rate": 1.994933771237179e-05, "loss": 0.232, "step": 273 }, { "epoch": 0.12955082742316784, "grad_norm": 5.810388565063477, "learning_rate": 1.9947664500781464e-05, "loss": 0.3371, "step": 274 }, { "epoch": 0.13002364066193853, "grad_norm": 4.976069450378418, "learning_rate": 1.9945964178337037e-05, "loss": 0.2469, "step": 275 }, { "epoch": 0.13049645390070921, "grad_norm": 5.738739013671875, "learning_rate": 1.9944236749672483e-05, "loss": 0.3208, "step": 276 }, { "epoch": 0.1309692671394799, "grad_norm": 6.816530704498291, "learning_rate": 1.9942482219495644e-05, "loss": 0.327, "step": 277 }, { "epoch": 0.1314420803782506, "grad_norm": 4.750877857208252, "learning_rate": 1.9940700592588228e-05, "loss": 0.2176, "step": 278 }, { "epoch": 0.13191489361702127, "grad_norm": 4.42209529876709, "learning_rate": 1.9938891873805787e-05, "loss": 0.3138, "step": 279 }, { "epoch": 0.13238770685579196, "grad_norm": 3.6869099140167236, "learning_rate": 1.993705606807771e-05, "loss": 0.2309, "step": 280 }, { "epoch": 0.13238770685579196, "eval_accuracy": 0.8203991130820399, "eval_f1": 0.5759162303664922, "eval_loss": 0.36263027787208557, "eval_precision": 0.8527131782945736, "eval_recall": 0.43478260869565216, "eval_runtime": 49.2617, "eval_samples_per_second": 5.603, "eval_steps_per_second": 0.183, "step": 280 }, { "epoch": 0.13286052009456265, "grad_norm": 8.313398361206055, "learning_rate": 1.9935193180407216e-05, "loss": 0.3767, "step": 281 }, { "epoch": 0.13333333333333333, "grad_norm": 3.1473255157470703, "learning_rate": 1.9933303215871313e-05, "loss": 0.21, "step": 282 }, { "epoch": 0.13380614657210402, "grad_norm": 4.254230499267578, "learning_rate": 1.9931386179620816e-05, "loss": 0.2737, "step": 283 }, { "epoch": 0.1342789598108747, "grad_norm": 7.221229553222656, "learning_rate": 1.9929442076880323e-05, "loss": 0.2579, "step": 284 }, { "epoch": 0.1347517730496454, "grad_norm": 5.343625068664551, "learning_rate": 1.9927470912948184e-05, "loss": 0.3152, "step": 285 }, { "epoch": 0.13522458628841608, "grad_norm": 6.4857916831970215, "learning_rate": 1.992547269319651e-05, "loss": 0.3524, "step": 286 }, { "epoch": 0.13569739952718676, "grad_norm": 6.073742389678955, "learning_rate": 1.9923447423071153e-05, "loss": 0.3117, "step": 287 }, { "epoch": 0.13617021276595745, "grad_norm": 6.168652057647705, "learning_rate": 1.992139510809167e-05, "loss": 0.343, "step": 288 }, { "epoch": 0.13664302600472814, "grad_norm": 4.182333469390869, "learning_rate": 1.9919315753851343e-05, "loss": 0.2652, "step": 289 }, { "epoch": 0.13711583924349882, "grad_norm": 5.490057945251465, "learning_rate": 1.9917209366017134e-05, "loss": 0.3377, "step": 290 }, { "epoch": 0.1375886524822695, "grad_norm": 4.765297889709473, "learning_rate": 1.9915075950329683e-05, "loss": 0.2798, "step": 291 }, { "epoch": 0.1380614657210402, "grad_norm": 6.1175923347473145, "learning_rate": 1.9912915512603294e-05, "loss": 0.3759, "step": 292 }, { "epoch": 0.13853427895981088, "grad_norm": 7.154444217681885, "learning_rate": 1.991072805872591e-05, "loss": 0.3252, "step": 293 }, { "epoch": 0.13900709219858157, "grad_norm": 6.086852550506592, "learning_rate": 1.990851359465911e-05, "loss": 0.3158, "step": 294 }, { "epoch": 0.13947990543735225, "grad_norm": 6.656464576721191, "learning_rate": 1.990627212643808e-05, "loss": 0.3349, "step": 295 }, { "epoch": 0.13995271867612294, "grad_norm": 5.9059062004089355, "learning_rate": 1.9904003660171597e-05, "loss": 0.2919, "step": 296 }, { "epoch": 0.14042553191489363, "grad_norm": 7.576990604400635, "learning_rate": 1.990170820204203e-05, "loss": 0.2865, "step": 297 }, { "epoch": 0.1408983451536643, "grad_norm": 5.305886268615723, "learning_rate": 1.9899385758305298e-05, "loss": 0.3499, "step": 298 }, { "epoch": 0.141371158392435, "grad_norm": 4.519371032714844, "learning_rate": 1.9897036335290868e-05, "loss": 0.2928, "step": 299 }, { "epoch": 0.14184397163120568, "grad_norm": 4.735250949859619, "learning_rate": 1.989465993940174e-05, "loss": 0.2843, "step": 300 }, { "epoch": 0.14184397163120568, "eval_accuracy": 0.8325942350332595, "eval_f1": 0.6215538847117794, "eval_loss": 0.35112443566322327, "eval_precision": 0.8493150684931506, "eval_recall": 0.4901185770750988, "eval_runtime": 49.5436, "eval_samples_per_second": 5.571, "eval_steps_per_second": 0.182, "step": 300 }, { "epoch": 0.14231678486997637, "grad_norm": 6.546813488006592, "learning_rate": 1.9892256577114422e-05, "loss": 0.2892, "step": 301 }, { "epoch": 0.14278959810874706, "grad_norm": 10.950983047485352, "learning_rate": 1.9889826254978915e-05, "loss": 0.4064, "step": 302 }, { "epoch": 0.14326241134751774, "grad_norm": 3.705230474472046, "learning_rate": 1.988736897961869e-05, "loss": 0.2045, "step": 303 }, { "epoch": 0.14373522458628843, "grad_norm": 8.16879940032959, "learning_rate": 1.9884884757730683e-05, "loss": 0.4112, "step": 304 }, { "epoch": 0.14420803782505912, "grad_norm": 3.9046270847320557, "learning_rate": 1.988237359608526e-05, "loss": 0.234, "step": 305 }, { "epoch": 0.14468085106382977, "grad_norm": 3.2188122272491455, "learning_rate": 1.987983550152622e-05, "loss": 0.16, "step": 306 }, { "epoch": 0.14515366430260046, "grad_norm": 4.718387603759766, "learning_rate": 1.987727048097075e-05, "loss": 0.2409, "step": 307 }, { "epoch": 0.14562647754137115, "grad_norm": 4.721340656280518, "learning_rate": 1.9874678541409427e-05, "loss": 0.3075, "step": 308 }, { "epoch": 0.14609929078014183, "grad_norm": 8.97851848602295, "learning_rate": 1.9872059689906188e-05, "loss": 0.3628, "step": 309 }, { "epoch": 0.14657210401891252, "grad_norm": 5.3872809410095215, "learning_rate": 1.9869413933598317e-05, "loss": 0.2832, "step": 310 }, { "epoch": 0.1470449172576832, "grad_norm": 6.58519172668457, "learning_rate": 1.986674127969642e-05, "loss": 0.2423, "step": 311 }, { "epoch": 0.1475177304964539, "grad_norm": 6.278075218200684, "learning_rate": 1.9864041735484417e-05, "loss": 0.3341, "step": 312 }, { "epoch": 0.14799054373522458, "grad_norm": 6.8102288246154785, "learning_rate": 1.986131530831951e-05, "loss": 0.2887, "step": 313 }, { "epoch": 0.14846335697399526, "grad_norm": 6.963206768035889, "learning_rate": 1.985856200563215e-05, "loss": 0.2883, "step": 314 }, { "epoch": 0.14893617021276595, "grad_norm": 5.686670303344727, "learning_rate": 1.9855781834926057e-05, "loss": 0.3673, "step": 315 }, { "epoch": 0.14940898345153664, "grad_norm": 5.6203227043151855, "learning_rate": 1.985297480377816e-05, "loss": 0.2601, "step": 316 }, { "epoch": 0.14988179669030732, "grad_norm": 5.668765068054199, "learning_rate": 1.98501409198386e-05, "loss": 0.2868, "step": 317 }, { "epoch": 0.150354609929078, "grad_norm": 4.719535827636719, "learning_rate": 1.9847280190830706e-05, "loss": 0.2295, "step": 318 }, { "epoch": 0.1508274231678487, "grad_norm": 6.404664039611816, "learning_rate": 1.9844392624550952e-05, "loss": 0.3816, "step": 319 }, { "epoch": 0.15130023640661938, "grad_norm": 4.635862827301025, "learning_rate": 1.9841478228868966e-05, "loss": 0.2694, "step": 320 }, { "epoch": 0.15130023640661938, "eval_accuracy": 0.8337028824833703, "eval_f1": 0.6287128712871287, "eval_loss": 0.34866657853126526, "eval_precision": 0.8410596026490066, "eval_recall": 0.5019762845849802, "eval_runtime": 49.2171, "eval_samples_per_second": 5.608, "eval_steps_per_second": 0.183, "step": 320 }, { "epoch": 0.15177304964539007, "grad_norm": 6.7525787353515625, "learning_rate": 1.983853701172749e-05, "loss": 0.3208, "step": 321 }, { "epoch": 0.15224586288416075, "grad_norm": 4.8178019523620605, "learning_rate": 1.9835568981142376e-05, "loss": 0.2365, "step": 322 }, { "epoch": 0.15271867612293144, "grad_norm": 6.369754791259766, "learning_rate": 1.9832574145202524e-05, "loss": 0.3079, "step": 323 }, { "epoch": 0.15319148936170213, "grad_norm": 6.613752365112305, "learning_rate": 1.982955251206993e-05, "loss": 0.3133, "step": 324 }, { "epoch": 0.1536643026004728, "grad_norm": 7.213798999786377, "learning_rate": 1.9826504089979573e-05, "loss": 0.3551, "step": 325 }, { "epoch": 0.1541371158392435, "grad_norm": 5.0920515060424805, "learning_rate": 1.9823428887239484e-05, "loss": 0.2295, "step": 326 }, { "epoch": 0.15460992907801419, "grad_norm": 6.177611827850342, "learning_rate": 1.9820326912230654e-05, "loss": 0.3048, "step": 327 }, { "epoch": 0.15508274231678487, "grad_norm": 6.256964683532715, "learning_rate": 1.981719817340705e-05, "loss": 0.3382, "step": 328 }, { "epoch": 0.15555555555555556, "grad_norm": 7.9319915771484375, "learning_rate": 1.9814042679295574e-05, "loss": 0.4387, "step": 329 }, { "epoch": 0.15602836879432624, "grad_norm": 5.281505107879639, "learning_rate": 1.981086043849605e-05, "loss": 0.2951, "step": 330 }, { "epoch": 0.15650118203309693, "grad_norm": 6.895681858062744, "learning_rate": 1.9807651459681195e-05, "loss": 0.399, "step": 331 }, { "epoch": 0.15697399527186762, "grad_norm": 6.184955596923828, "learning_rate": 1.9804415751596587e-05, "loss": 0.2605, "step": 332 }, { "epoch": 0.1574468085106383, "grad_norm": 6.7411699295043945, "learning_rate": 1.9801153323060667e-05, "loss": 0.3157, "step": 333 }, { "epoch": 0.157919621749409, "grad_norm": 3.7483937740325928, "learning_rate": 1.9797864182964687e-05, "loss": 0.2806, "step": 334 }, { "epoch": 0.15839243498817968, "grad_norm": 7.106858253479004, "learning_rate": 1.97945483402727e-05, "loss": 0.3, "step": 335 }, { "epoch": 0.15886524822695036, "grad_norm": 6.808032512664795, "learning_rate": 1.9791205804021537e-05, "loss": 0.3269, "step": 336 }, { "epoch": 0.15933806146572105, "grad_norm": 5.708781719207764, "learning_rate": 1.978783658332077e-05, "loss": 0.248, "step": 337 }, { "epoch": 0.15981087470449173, "grad_norm": 4.7683587074279785, "learning_rate": 1.9784440687352708e-05, "loss": 0.2693, "step": 338 }, { "epoch": 0.16028368794326242, "grad_norm": 4.79544734954834, "learning_rate": 1.9781018125372337e-05, "loss": 0.2603, "step": 339 }, { "epoch": 0.1607565011820331, "grad_norm": 8.155563354492188, "learning_rate": 1.9777568906707344e-05, "loss": 0.3854, "step": 340 }, { "epoch": 0.1607565011820331, "eval_accuracy": 0.8192904656319291, "eval_f1": 0.5788113695090439, "eval_loss": 0.35731109976768494, "eval_precision": 0.835820895522388, "eval_recall": 0.4426877470355731, "eval_runtime": 49.5821, "eval_samples_per_second": 5.567, "eval_steps_per_second": 0.182, "step": 340 }, { "epoch": 0.1612293144208038, "grad_norm": 4.6710286140441895, "learning_rate": 1.977409304075805e-05, "loss": 0.2523, "step": 341 }, { "epoch": 0.16170212765957448, "grad_norm": 5.3869781494140625, "learning_rate": 1.97705905369974e-05, "loss": 0.3004, "step": 342 }, { "epoch": 0.16217494089834517, "grad_norm": 4.742314338684082, "learning_rate": 1.976706140497094e-05, "loss": 0.2293, "step": 343 }, { "epoch": 0.16264775413711585, "grad_norm": 7.420506477355957, "learning_rate": 1.9763505654296782e-05, "loss": 0.2997, "step": 344 }, { "epoch": 0.16312056737588654, "grad_norm": 6.123251438140869, "learning_rate": 1.9759923294665588e-05, "loss": 0.2884, "step": 345 }, { "epoch": 0.1635933806146572, "grad_norm": 8.684674263000488, "learning_rate": 1.9756314335840535e-05, "loss": 0.3789, "step": 346 }, { "epoch": 0.16406619385342788, "grad_norm": 6.067385673522949, "learning_rate": 1.97526787876573e-05, "loss": 0.3224, "step": 347 }, { "epoch": 0.16453900709219857, "grad_norm": 5.475379467010498, "learning_rate": 1.9749016660024014e-05, "loss": 0.2177, "step": 348 }, { "epoch": 0.16501182033096926, "grad_norm": 5.6096391677856445, "learning_rate": 1.9745327962921253e-05, "loss": 0.2141, "step": 349 }, { "epoch": 0.16548463356973994, "grad_norm": 6.197654724121094, "learning_rate": 1.9741612706402002e-05, "loss": 0.3054, "step": 350 }, { "epoch": 0.16595744680851063, "grad_norm": 5.453296184539795, "learning_rate": 1.973787090059163e-05, "loss": 0.2404, "step": 351 }, { "epoch": 0.16643026004728131, "grad_norm": 4.969869613647461, "learning_rate": 1.9734102555687868e-05, "loss": 0.2441, "step": 352 }, { "epoch": 0.166903073286052, "grad_norm": 5.547883987426758, "learning_rate": 1.9730307681960763e-05, "loss": 0.255, "step": 353 }, { "epoch": 0.1673758865248227, "grad_norm": 8.373857498168945, "learning_rate": 1.972648628975267e-05, "loss": 0.323, "step": 354 }, { "epoch": 0.16784869976359337, "grad_norm": 9.678048133850098, "learning_rate": 1.9722638389478218e-05, "loss": 0.3685, "step": 355 }, { "epoch": 0.16832151300236406, "grad_norm": 5.831118106842041, "learning_rate": 1.9718763991624277e-05, "loss": 0.2394, "step": 356 }, { "epoch": 0.16879432624113475, "grad_norm": 5.348067760467529, "learning_rate": 1.9714863106749928e-05, "loss": 0.2312, "step": 357 }, { "epoch": 0.16926713947990543, "grad_norm": 5.5273518562316895, "learning_rate": 1.9710935745486447e-05, "loss": 0.2442, "step": 358 }, { "epoch": 0.16973995271867612, "grad_norm": 4.848127365112305, "learning_rate": 1.9706981918537257e-05, "loss": 0.2208, "step": 359 }, { "epoch": 0.1702127659574468, "grad_norm": 5.912291526794434, "learning_rate": 1.970300163667792e-05, "loss": 0.3062, "step": 360 }, { "epoch": 0.1702127659574468, "eval_accuracy": 0.8470066518847007, "eval_f1": 0.7, "eval_loss": 0.326249897480011, "eval_precision": 0.7777777777777778, "eval_recall": 0.6363636363636364, "eval_runtime": 49.3056, "eval_samples_per_second": 5.598, "eval_steps_per_second": 0.183, "step": 360 }, { "epoch": 0.1706855791962175, "grad_norm": 7.104170322418213, "learning_rate": 1.9698994910756092e-05, "loss": 0.2781, "step": 361 }, { "epoch": 0.17115839243498818, "grad_norm": 6.990221977233887, "learning_rate": 1.969496175169149e-05, "loss": 0.2876, "step": 362 }, { "epoch": 0.17163120567375886, "grad_norm": 6.5197014808654785, "learning_rate": 1.9690902170475894e-05, "loss": 0.279, "step": 363 }, { "epoch": 0.17210401891252955, "grad_norm": 4.127284526824951, "learning_rate": 1.9686816178173065e-05, "loss": 0.2238, "step": 364 }, { "epoch": 0.17257683215130024, "grad_norm": 7.10957145690918, "learning_rate": 1.968270378591876e-05, "loss": 0.3302, "step": 365 }, { "epoch": 0.17304964539007092, "grad_norm": 5.990342617034912, "learning_rate": 1.967856500492068e-05, "loss": 0.2445, "step": 366 }, { "epoch": 0.1735224586288416, "grad_norm": 8.701051712036133, "learning_rate": 1.9674399846458455e-05, "loss": 0.3321, "step": 367 }, { "epoch": 0.1739952718676123, "grad_norm": 6.448107719421387, "learning_rate": 1.9670208321883588e-05, "loss": 0.3304, "step": 368 }, { "epoch": 0.17446808510638298, "grad_norm": 6.382139682769775, "learning_rate": 1.966599044261944e-05, "loss": 0.2925, "step": 369 }, { "epoch": 0.17494089834515367, "grad_norm": 5.474351406097412, "learning_rate": 1.9661746220161208e-05, "loss": 0.3041, "step": 370 }, { "epoch": 0.17541371158392435, "grad_norm": 6.983358383178711, "learning_rate": 1.965747566607588e-05, "loss": 0.3657, "step": 371 }, { "epoch": 0.17588652482269504, "grad_norm": 6.531889915466309, "learning_rate": 1.9653178792002203e-05, "loss": 0.2679, "step": 372 }, { "epoch": 0.17635933806146573, "grad_norm": 4.009150505065918, "learning_rate": 1.964885560965065e-05, "loss": 0.2429, "step": 373 }, { "epoch": 0.1768321513002364, "grad_norm": 4.717473983764648, "learning_rate": 1.964450613080341e-05, "loss": 0.2378, "step": 374 }, { "epoch": 0.1773049645390071, "grad_norm": 5.107661724090576, "learning_rate": 1.9640130367314327e-05, "loss": 0.3191, "step": 375 }, { "epoch": 0.17777777777777778, "grad_norm": 6.810859203338623, "learning_rate": 1.963572833110888e-05, "loss": 0.3077, "step": 376 }, { "epoch": 0.17825059101654847, "grad_norm": 4.793877124786377, "learning_rate": 1.9631300034184155e-05, "loss": 0.2426, "step": 377 }, { "epoch": 0.17872340425531916, "grad_norm": 5.632551193237305, "learning_rate": 1.96268454886088e-05, "loss": 0.2767, "step": 378 }, { "epoch": 0.17919621749408984, "grad_norm": 4.865971565246582, "learning_rate": 1.962236470652301e-05, "loss": 0.2907, "step": 379 }, { "epoch": 0.17966903073286053, "grad_norm": 6.953917503356934, "learning_rate": 1.9617857700138477e-05, "loss": 0.2861, "step": 380 }, { "epoch": 0.17966903073286053, "eval_accuracy": 0.8458980044345898, "eval_f1": 0.6774941995359629, "eval_loss": 0.3308302164077759, "eval_precision": 0.8202247191011236, "eval_recall": 0.5770750988142292, "eval_runtime": 49.0662, "eval_samples_per_second": 5.625, "eval_steps_per_second": 0.183, "step": 380 }, { "epoch": 0.18014184397163122, "grad_norm": 8.24282455444336, "learning_rate": 1.9613324481738364e-05, "loss": 0.3452, "step": 381 }, { "epoch": 0.1806146572104019, "grad_norm": 6.349436283111572, "learning_rate": 1.9608765063677272e-05, "loss": 0.3436, "step": 382 }, { "epoch": 0.1810874704491726, "grad_norm": 4.89585018157959, "learning_rate": 1.9604179458381204e-05, "loss": 0.2596, "step": 383 }, { "epoch": 0.18156028368794327, "grad_norm": 5.193378925323486, "learning_rate": 1.9599567678347536e-05, "loss": 0.1857, "step": 384 }, { "epoch": 0.18203309692671396, "grad_norm": 4.266732692718506, "learning_rate": 1.9594929736144978e-05, "loss": 0.1993, "step": 385 }, { "epoch": 0.18250591016548465, "grad_norm": 4.616336822509766, "learning_rate": 1.959026564441353e-05, "loss": 0.2655, "step": 386 }, { "epoch": 0.1829787234042553, "grad_norm": 5.701202392578125, "learning_rate": 1.958557541586448e-05, "loss": 0.2377, "step": 387 }, { "epoch": 0.183451536643026, "grad_norm": 4.910188674926758, "learning_rate": 1.9580859063280326e-05, "loss": 0.2346, "step": 388 }, { "epoch": 0.18392434988179668, "grad_norm": 5.084827899932861, "learning_rate": 1.957611659951478e-05, "loss": 0.2473, "step": 389 }, { "epoch": 0.18439716312056736, "grad_norm": 5.31158971786499, "learning_rate": 1.9571348037492705e-05, "loss": 0.2524, "step": 390 }, { "epoch": 0.18486997635933805, "grad_norm": 5.35557746887207, "learning_rate": 1.9566553390210103e-05, "loss": 0.22, "step": 391 }, { "epoch": 0.18534278959810874, "grad_norm": 7.644250392913818, "learning_rate": 1.9561732670734048e-05, "loss": 0.3009, "step": 392 }, { "epoch": 0.18581560283687942, "grad_norm": 9.144373893737793, "learning_rate": 1.9556885892202685e-05, "loss": 0.4346, "step": 393 }, { "epoch": 0.1862884160756501, "grad_norm": 6.692631721496582, "learning_rate": 1.9552013067825185e-05, "loss": 0.3075, "step": 394 }, { "epoch": 0.1867612293144208, "grad_norm": 7.828726768493652, "learning_rate": 1.9547114210881683e-05, "loss": 0.3187, "step": 395 }, { "epoch": 0.18723404255319148, "grad_norm": 7.344780445098877, "learning_rate": 1.954218933472327e-05, "loss": 0.3382, "step": 396 }, { "epoch": 0.18770685579196217, "grad_norm": 5.836700439453125, "learning_rate": 1.9537238452771962e-05, "loss": 0.2509, "step": 397 }, { "epoch": 0.18817966903073285, "grad_norm": 6.650071144104004, "learning_rate": 1.953226157852063e-05, "loss": 0.214, "step": 398 }, { "epoch": 0.18865248226950354, "grad_norm": 6.298871040344238, "learning_rate": 1.952725872553299e-05, "loss": 0.2362, "step": 399 }, { "epoch": 0.18912529550827423, "grad_norm": 5.426384449005127, "learning_rate": 1.952222990744357e-05, "loss": 0.2808, "step": 400 }, { "epoch": 0.18912529550827423, "eval_accuracy": 0.8337028824833703, "eval_f1": 0.609375, "eval_loss": 0.35840529203414917, "eval_precision": 0.8931297709923665, "eval_recall": 0.4624505928853755, "eval_runtime": 49.5299, "eval_samples_per_second": 5.572, "eval_steps_per_second": 0.182, "step": 400 }, { "epoch": 0.1895981087470449, "grad_norm": 4.219785213470459, "learning_rate": 1.9517175137957647e-05, "loss": 0.2007, "step": 401 }, { "epoch": 0.1900709219858156, "grad_norm": 4.726499080657959, "learning_rate": 1.9512094430851226e-05, "loss": 0.2333, "step": 402 }, { "epoch": 0.19054373522458629, "grad_norm": 7.44296407699585, "learning_rate": 1.9506987799971013e-05, "loss": 0.2563, "step": 403 }, { "epoch": 0.19101654846335697, "grad_norm": 7.742011547088623, "learning_rate": 1.9501855259234353e-05, "loss": 0.313, "step": 404 }, { "epoch": 0.19148936170212766, "grad_norm": 6.5203657150268555, "learning_rate": 1.9496696822629208e-05, "loss": 0.3372, "step": 405 }, { "epoch": 0.19196217494089834, "grad_norm": 6.323021411895752, "learning_rate": 1.9491512504214123e-05, "loss": 0.2561, "step": 406 }, { "epoch": 0.19243498817966903, "grad_norm": 5.560845851898193, "learning_rate": 1.9486302318118164e-05, "loss": 0.2822, "step": 407 }, { "epoch": 0.19290780141843972, "grad_norm": 5.433687686920166, "learning_rate": 1.9481066278540912e-05, "loss": 0.1501, "step": 408 }, { "epoch": 0.1933806146572104, "grad_norm": 5.543313026428223, "learning_rate": 1.9475804399752397e-05, "loss": 0.2399, "step": 409 }, { "epoch": 0.1938534278959811, "grad_norm": 3.8356525897979736, "learning_rate": 1.9470516696093075e-05, "loss": 0.1458, "step": 410 }, { "epoch": 0.19432624113475178, "grad_norm": 5.613055229187012, "learning_rate": 1.946520318197378e-05, "loss": 0.2665, "step": 411 }, { "epoch": 0.19479905437352246, "grad_norm": 7.673485279083252, "learning_rate": 1.9459863871875694e-05, "loss": 0.3718, "step": 412 }, { "epoch": 0.19527186761229315, "grad_norm": 5.367708683013916, "learning_rate": 1.945449878035029e-05, "loss": 0.1897, "step": 413 }, { "epoch": 0.19574468085106383, "grad_norm": 7.302910804748535, "learning_rate": 1.9449107922019326e-05, "loss": 0.2457, "step": 414 }, { "epoch": 0.19621749408983452, "grad_norm": 6.045085906982422, "learning_rate": 1.944369131157476e-05, "loss": 0.2974, "step": 415 }, { "epoch": 0.1966903073286052, "grad_norm": 7.004913806915283, "learning_rate": 1.9438248963778754e-05, "loss": 0.2723, "step": 416 }, { "epoch": 0.1971631205673759, "grad_norm": 6.820647716522217, "learning_rate": 1.9432780893463594e-05, "loss": 0.3367, "step": 417 }, { "epoch": 0.19763593380614658, "grad_norm": 6.083995819091797, "learning_rate": 1.942728711553168e-05, "loss": 0.265, "step": 418 }, { "epoch": 0.19810874704491727, "grad_norm": 4.812845706939697, "learning_rate": 1.942176764495547e-05, "loss": 0.2567, "step": 419 }, { "epoch": 0.19858156028368795, "grad_norm": 5.881125450134277, "learning_rate": 1.9416222496777453e-05, "loss": 0.2716, "step": 420 }, { "epoch": 0.19858156028368795, "eval_accuracy": 0.852549889135255, "eval_f1": 0.6825775656324582, "eval_loss": 0.33116355538368225, "eval_precision": 0.8614457831325302, "eval_recall": 0.5652173913043478, "eval_runtime": 47.258, "eval_samples_per_second": 5.84, "eval_steps_per_second": 0.19, "step": 420 }, { "epoch": 0.19905437352245864, "grad_norm": 3.609654426574707, "learning_rate": 1.941065168611009e-05, "loss": 0.2053, "step": 421 }, { "epoch": 0.19952718676122932, "grad_norm": 5.121413230895996, "learning_rate": 1.9405055228135777e-05, "loss": 0.2572, "step": 422 }, { "epoch": 0.2, "grad_norm": 6.144900321960449, "learning_rate": 1.9399433138106814e-05, "loss": 0.2689, "step": 423 }, { "epoch": 0.2004728132387707, "grad_norm": 5.300688743591309, "learning_rate": 1.939378543134536e-05, "loss": 0.3146, "step": 424 }, { "epoch": 0.20094562647754138, "grad_norm": 8.221715927124023, "learning_rate": 1.9388112123243386e-05, "loss": 0.2843, "step": 425 }, { "epoch": 0.20141843971631207, "grad_norm": 3.9929358959198, "learning_rate": 1.938241322926263e-05, "loss": 0.2603, "step": 426 }, { "epoch": 0.20189125295508276, "grad_norm": 4.981365203857422, "learning_rate": 1.937668876493457e-05, "loss": 0.3303, "step": 427 }, { "epoch": 0.20236406619385341, "grad_norm": 5.623127460479736, "learning_rate": 1.9370938745860362e-05, "loss": 0.2564, "step": 428 }, { "epoch": 0.2028368794326241, "grad_norm": 5.664045810699463, "learning_rate": 1.9365163187710817e-05, "loss": 0.2749, "step": 429 }, { "epoch": 0.2033096926713948, "grad_norm": 5.24921178817749, "learning_rate": 1.935936210622634e-05, "loss": 0.2835, "step": 430 }, { "epoch": 0.20378250591016547, "grad_norm": 8.841286659240723, "learning_rate": 1.9353535517216908e-05, "loss": 0.3114, "step": 431 }, { "epoch": 0.20425531914893616, "grad_norm": 4.470973491668701, "learning_rate": 1.9347683436562e-05, "loss": 0.2417, "step": 432 }, { "epoch": 0.20472813238770685, "grad_norm": 7.2563276290893555, "learning_rate": 1.934180588021058e-05, "loss": 0.2727, "step": 433 }, { "epoch": 0.20520094562647753, "grad_norm": 6.107417106628418, "learning_rate": 1.933590286418104e-05, "loss": 0.2461, "step": 434 }, { "epoch": 0.20567375886524822, "grad_norm": 10.675925254821777, "learning_rate": 1.932997440456115e-05, "loss": 0.3362, "step": 435 }, { "epoch": 0.2061465721040189, "grad_norm": 6.831496715545654, "learning_rate": 1.932402051750803e-05, "loss": 0.326, "step": 436 }, { "epoch": 0.2066193853427896, "grad_norm": 6.8831095695495605, "learning_rate": 1.9318041219248108e-05, "loss": 0.3411, "step": 437 }, { "epoch": 0.20709219858156028, "grad_norm": 4.773165225982666, "learning_rate": 1.9312036526077055e-05, "loss": 0.2587, "step": 438 }, { "epoch": 0.20756501182033096, "grad_norm": 7.510995864868164, "learning_rate": 1.930600645435974e-05, "loss": 0.2906, "step": 439 }, { "epoch": 0.20803782505910165, "grad_norm": 6.88019323348999, "learning_rate": 1.9299951020530226e-05, "loss": 0.3696, "step": 440 }, { "epoch": 0.20803782505910165, "eval_accuracy": 0.8547671840354767, "eval_f1": 0.7120879120879121, "eval_loss": 0.31960517168045044, "eval_precision": 0.801980198019802, "eval_recall": 0.6403162055335968, "eval_runtime": 47.3456, "eval_samples_per_second": 5.829, "eval_steps_per_second": 0.19, "step": 440 }, { "epoch": 0.20851063829787234, "grad_norm": 4.385389804840088, "learning_rate": 1.929387024109167e-05, "loss": 0.2451, "step": 441 }, { "epoch": 0.20898345153664302, "grad_norm": 5.8849077224731445, "learning_rate": 1.9287764132616323e-05, "loss": 0.2734, "step": 442 }, { "epoch": 0.2094562647754137, "grad_norm": 8.523555755615234, "learning_rate": 1.928163271174546e-05, "loss": 0.3602, "step": 443 }, { "epoch": 0.2099290780141844, "grad_norm": 8.392167091369629, "learning_rate": 1.927547599518934e-05, "loss": 0.286, "step": 444 }, { "epoch": 0.21040189125295508, "grad_norm": 6.212944030761719, "learning_rate": 1.9269293999727156e-05, "loss": 0.2083, "step": 445 }, { "epoch": 0.21087470449172577, "grad_norm": 6.23652982711792, "learning_rate": 1.926308674220701e-05, "loss": 0.259, "step": 446 }, { "epoch": 0.21134751773049645, "grad_norm": 6.646223545074463, "learning_rate": 1.9256854239545833e-05, "loss": 0.325, "step": 447 }, { "epoch": 0.21182033096926714, "grad_norm": 10.316300392150879, "learning_rate": 1.925059650872938e-05, "loss": 0.4394, "step": 448 }, { "epoch": 0.21229314420803783, "grad_norm": 4.443530559539795, "learning_rate": 1.9244313566812138e-05, "loss": 0.2843, "step": 449 }, { "epoch": 0.2127659574468085, "grad_norm": 5.295090198516846, "learning_rate": 1.923800543091732e-05, "loss": 0.2672, "step": 450 }, { "epoch": 0.2132387706855792, "grad_norm": 4.4014482498168945, "learning_rate": 1.9231672118236798e-05, "loss": 0.2578, "step": 451 }, { "epoch": 0.21371158392434988, "grad_norm": 5.532175540924072, "learning_rate": 1.922531364603105e-05, "loss": 0.2718, "step": 452 }, { "epoch": 0.21418439716312057, "grad_norm": 5.192704200744629, "learning_rate": 1.9218930031629134e-05, "loss": 0.2279, "step": 453 }, { "epoch": 0.21465721040189126, "grad_norm": 4.8312458992004395, "learning_rate": 1.921252129242863e-05, "loss": 0.3158, "step": 454 }, { "epoch": 0.21513002364066194, "grad_norm": 4.629521369934082, "learning_rate": 1.9206087445895572e-05, "loss": 0.187, "step": 455 }, { "epoch": 0.21560283687943263, "grad_norm": 5.664729118347168, "learning_rate": 1.9199628509564455e-05, "loss": 0.2869, "step": 456 }, { "epoch": 0.21607565011820332, "grad_norm": 6.123085021972656, "learning_rate": 1.9193144501038116e-05, "loss": 0.2455, "step": 457 }, { "epoch": 0.216548463356974, "grad_norm": 5.794577121734619, "learning_rate": 1.9186635437987746e-05, "loss": 0.2984, "step": 458 }, { "epoch": 0.2170212765957447, "grad_norm": 5.055876731872559, "learning_rate": 1.9180101338152807e-05, "loss": 0.1974, "step": 459 }, { "epoch": 0.21749408983451538, "grad_norm": 5.301477432250977, "learning_rate": 1.9173542219341005e-05, "loss": 0.1911, "step": 460 }, { "epoch": 0.21749408983451538, "eval_accuracy": 0.8425720620842572, "eval_f1": 0.6467661691542289, "eval_loss": 0.34363728761672974, "eval_precision": 0.87248322147651, "eval_recall": 0.5138339920948617, "eval_runtime": 47.279, "eval_samples_per_second": 5.838, "eval_steps_per_second": 0.19, "step": 460 }, { "epoch": 0.21796690307328606, "grad_norm": 3.5169882774353027, "learning_rate": 1.9166958099428227e-05, "loss": 0.2012, "step": 461 }, { "epoch": 0.21843971631205675, "grad_norm": 7.913601875305176, "learning_rate": 1.9160348996358484e-05, "loss": 0.3436, "step": 462 }, { "epoch": 0.21891252955082743, "grad_norm": 6.095242500305176, "learning_rate": 1.9153714928143898e-05, "loss": 0.2419, "step": 463 }, { "epoch": 0.21938534278959812, "grad_norm": 6.3486104011535645, "learning_rate": 1.914705591286461e-05, "loss": 0.2504, "step": 464 }, { "epoch": 0.2198581560283688, "grad_norm": 4.352773189544678, "learning_rate": 1.9140371968668767e-05, "loss": 0.211, "step": 465 }, { "epoch": 0.2203309692671395, "grad_norm": 7.6094889640808105, "learning_rate": 1.9133663113772437e-05, "loss": 0.2995, "step": 466 }, { "epoch": 0.22080378250591018, "grad_norm": 5.553167819976807, "learning_rate": 1.9126929366459596e-05, "loss": 0.1836, "step": 467 }, { "epoch": 0.22127659574468084, "grad_norm": 6.153863906860352, "learning_rate": 1.912017074508205e-05, "loss": 0.2946, "step": 468 }, { "epoch": 0.22174940898345152, "grad_norm": 5.882277011871338, "learning_rate": 1.9113387268059402e-05, "loss": 0.1988, "step": 469 }, { "epoch": 0.2222222222222222, "grad_norm": 8.288911819458008, "learning_rate": 1.910657895387899e-05, "loss": 0.2778, "step": 470 }, { "epoch": 0.2226950354609929, "grad_norm": 8.619799613952637, "learning_rate": 1.9099745821095842e-05, "loss": 0.2995, "step": 471 }, { "epoch": 0.22316784869976358, "grad_norm": 4.235837936401367, "learning_rate": 1.909288788833263e-05, "loss": 0.1523, "step": 472 }, { "epoch": 0.22364066193853427, "grad_norm": 4.533273220062256, "learning_rate": 1.908600517427961e-05, "loss": 0.188, "step": 473 }, { "epoch": 0.22411347517730495, "grad_norm": 6.656558513641357, "learning_rate": 1.9079097697694578e-05, "loss": 0.2706, "step": 474 }, { "epoch": 0.22458628841607564, "grad_norm": 7.253176212310791, "learning_rate": 1.9072165477402813e-05, "loss": 0.2533, "step": 475 }, { "epoch": 0.22505910165484633, "grad_norm": 7.984339237213135, "learning_rate": 1.9065208532297043e-05, "loss": 0.2768, "step": 476 }, { "epoch": 0.225531914893617, "grad_norm": 5.5995259284973145, "learning_rate": 1.9058226881337356e-05, "loss": 0.2397, "step": 477 }, { "epoch": 0.2260047281323877, "grad_norm": 8.392828941345215, "learning_rate": 1.9051220543551193e-05, "loss": 0.235, "step": 478 }, { "epoch": 0.2264775413711584, "grad_norm": 5.6677422523498535, "learning_rate": 1.9044189538033264e-05, "loss": 0.2468, "step": 479 }, { "epoch": 0.22695035460992907, "grad_norm": 5.317750930786133, "learning_rate": 1.903713388394551e-05, "loss": 0.2548, "step": 480 }, { "epoch": 0.22695035460992907, "eval_accuracy": 0.852549889135255, "eval_f1": 0.6795180722891566, "eval_loss": 0.33114132285118103, "eval_precision": 0.8703703703703703, "eval_recall": 0.5573122529644269, "eval_runtime": 48.0304, "eval_samples_per_second": 5.746, "eval_steps_per_second": 0.187, "step": 480 }, { "epoch": 0.22742316784869976, "grad_norm": 6.943238258361816, "learning_rate": 1.9030053600517053e-05, "loss": 0.2657, "step": 481 }, { "epoch": 0.22789598108747045, "grad_norm": 5.950002193450928, "learning_rate": 1.902294870704413e-05, "loss": 0.2934, "step": 482 }, { "epoch": 0.22836879432624113, "grad_norm": 4.295022964477539, "learning_rate": 1.901581922289005e-05, "loss": 0.2139, "step": 483 }, { "epoch": 0.22884160756501182, "grad_norm": 5.326821804046631, "learning_rate": 1.9008665167485154e-05, "loss": 0.2407, "step": 484 }, { "epoch": 0.2293144208037825, "grad_norm": 6.2555036544799805, "learning_rate": 1.9001486560326724e-05, "loss": 0.2723, "step": 485 }, { "epoch": 0.2297872340425532, "grad_norm": 6.824589729309082, "learning_rate": 1.8994283420978975e-05, "loss": 0.3014, "step": 486 }, { "epoch": 0.23026004728132388, "grad_norm": 5.3086628913879395, "learning_rate": 1.8987055769072973e-05, "loss": 0.245, "step": 487 }, { "epoch": 0.23073286052009456, "grad_norm": 5.174909591674805, "learning_rate": 1.8979803624306585e-05, "loss": 0.3507, "step": 488 }, { "epoch": 0.23120567375886525, "grad_norm": 4.394033908843994, "learning_rate": 1.897252700644444e-05, "loss": 0.2182, "step": 489 }, { "epoch": 0.23167848699763594, "grad_norm": 5.750499248504639, "learning_rate": 1.8965225935317854e-05, "loss": 0.2635, "step": 490 }, { "epoch": 0.23215130023640662, "grad_norm": 5.786622047424316, "learning_rate": 1.8957900430824793e-05, "loss": 0.2483, "step": 491 }, { "epoch": 0.2326241134751773, "grad_norm": 8.868030548095703, "learning_rate": 1.895055051292981e-05, "loss": 0.3858, "step": 492 }, { "epoch": 0.233096926713948, "grad_norm": 5.2425408363342285, "learning_rate": 1.8943176201664e-05, "loss": 0.2107, "step": 493 }, { "epoch": 0.23356973995271868, "grad_norm": 4.864797115325928, "learning_rate": 1.8935777517124923e-05, "loss": 0.3114, "step": 494 }, { "epoch": 0.23404255319148937, "grad_norm": 6.035416603088379, "learning_rate": 1.8928354479476577e-05, "loss": 0.2315, "step": 495 }, { "epoch": 0.23451536643026005, "grad_norm": 5.7510857582092285, "learning_rate": 1.8920907108949335e-05, "loss": 0.2423, "step": 496 }, { "epoch": 0.23498817966903074, "grad_norm": 4.192838668823242, "learning_rate": 1.8913435425839865e-05, "loss": 0.2874, "step": 497 }, { "epoch": 0.23546099290780143, "grad_norm": 5.289735317230225, "learning_rate": 1.8905939450511117e-05, "loss": 0.2984, "step": 498 }, { "epoch": 0.2359338061465721, "grad_norm": 7.9628400802612305, "learning_rate": 1.889841920339224e-05, "loss": 0.3416, "step": 499 }, { "epoch": 0.2364066193853428, "grad_norm": 4.809371471405029, "learning_rate": 1.889087470497852e-05, "loss": 0.2501, "step": 500 }, { "epoch": 0.2364066193853428, "eval_accuracy": 0.8481152993348116, "eval_f1": 0.6666666666666666, "eval_loss": 0.3237150013446808, "eval_precision": 0.8670886075949367, "eval_recall": 0.541501976284585, "eval_runtime": 47.4742, "eval_samples_per_second": 5.814, "eval_steps_per_second": 0.19, "step": 500 }, { "epoch": 0.23687943262411348, "grad_norm": 4.674736499786377, "learning_rate": 1.8883305975831357e-05, "loss": 0.1875, "step": 501 }, { "epoch": 0.23735224586288417, "grad_norm": 3.8804233074188232, "learning_rate": 1.8875713036578168e-05, "loss": 0.184, "step": 502 }, { "epoch": 0.23782505910165486, "grad_norm": 6.960356712341309, "learning_rate": 1.886809590791236e-05, "loss": 0.2407, "step": 503 }, { "epoch": 0.23829787234042554, "grad_norm": 6.6463823318481445, "learning_rate": 1.886045461059327e-05, "loss": 0.2633, "step": 504 }, { "epoch": 0.23877068557919623, "grad_norm": 4.191521167755127, "learning_rate": 1.8852789165446094e-05, "loss": 0.218, "step": 505 }, { "epoch": 0.23924349881796692, "grad_norm": 5.596932411193848, "learning_rate": 1.8845099593361844e-05, "loss": 0.2609, "step": 506 }, { "epoch": 0.2397163120567376, "grad_norm": 4.571609973907471, "learning_rate": 1.883738591529728e-05, "loss": 0.1815, "step": 507 }, { "epoch": 0.2401891252955083, "grad_norm": 5.630218505859375, "learning_rate": 1.8829648152274872e-05, "loss": 0.2727, "step": 508 }, { "epoch": 0.24066193853427895, "grad_norm": 8.154533386230469, "learning_rate": 1.8821886325382718e-05, "loss": 0.3277, "step": 509 }, { "epoch": 0.24113475177304963, "grad_norm": 5.654308795928955, "learning_rate": 1.8814100455774504e-05, "loss": 0.2938, "step": 510 }, { "epoch": 0.24160756501182032, "grad_norm": 6.1714677810668945, "learning_rate": 1.8806290564669435e-05, "loss": 0.2314, "step": 511 }, { "epoch": 0.242080378250591, "grad_norm": 5.192854404449463, "learning_rate": 1.879845667335219e-05, "loss": 0.2489, "step": 512 }, { "epoch": 0.2425531914893617, "grad_norm": 7.861010551452637, "learning_rate": 1.8790598803172857e-05, "loss": 0.2815, "step": 513 }, { "epoch": 0.24302600472813238, "grad_norm": 6.233393669128418, "learning_rate": 1.878271697554687e-05, "loss": 0.2584, "step": 514 }, { "epoch": 0.24349881796690306, "grad_norm": 6.324631690979004, "learning_rate": 1.8774811211954954e-05, "loss": 0.3333, "step": 515 }, { "epoch": 0.24397163120567375, "grad_norm": 7.769336700439453, "learning_rate": 1.8766881533943074e-05, "loss": 0.2968, "step": 516 }, { "epoch": 0.24444444444444444, "grad_norm": 6.459347724914551, "learning_rate": 1.875892796312237e-05, "loss": 0.2193, "step": 517 }, { "epoch": 0.24491725768321512, "grad_norm": 4.615235805511475, "learning_rate": 1.875095052116909e-05, "loss": 0.2396, "step": 518 }, { "epoch": 0.2453900709219858, "grad_norm": 4.699162483215332, "learning_rate": 1.874294922982455e-05, "loss": 0.2726, "step": 519 }, { "epoch": 0.2458628841607565, "grad_norm": 6.707888126373291, "learning_rate": 1.8734924110895056e-05, "loss": 0.2936, "step": 520 }, { "epoch": 0.2458628841607565, "eval_accuracy": 0.835920177383592, "eval_f1": 0.6205128205128205, "eval_loss": 0.3496428430080414, "eval_precision": 0.8832116788321168, "eval_recall": 0.4782608695652174, "eval_runtime": 49.5579, "eval_samples_per_second": 5.569, "eval_steps_per_second": 0.182, "step": 520 }, { "epoch": 0.24633569739952718, "grad_norm": 6.290736198425293, "learning_rate": 1.8726875186251856e-05, "loss": 0.2605, "step": 521 }, { "epoch": 0.24680851063829787, "grad_norm": 5.97813606262207, "learning_rate": 1.8718802477831072e-05, "loss": 0.257, "step": 522 }, { "epoch": 0.24728132387706855, "grad_norm": 5.485353469848633, "learning_rate": 1.8710706007633654e-05, "loss": 0.2114, "step": 523 }, { "epoch": 0.24775413711583924, "grad_norm": 4.747553825378418, "learning_rate": 1.8702585797725308e-05, "loss": 0.2579, "step": 524 }, { "epoch": 0.24822695035460993, "grad_norm": 5.752557754516602, "learning_rate": 1.869444187023643e-05, "loss": 0.2706, "step": 525 }, { "epoch": 0.2486997635933806, "grad_norm": 5.672857761383057, "learning_rate": 1.8686274247362067e-05, "loss": 0.23, "step": 526 }, { "epoch": 0.2491725768321513, "grad_norm": 5.739321708679199, "learning_rate": 1.8678082951361837e-05, "loss": 0.2274, "step": 527 }, { "epoch": 0.24964539007092199, "grad_norm": 8.147102355957031, "learning_rate": 1.8669868004559878e-05, "loss": 0.2682, "step": 528 }, { "epoch": 0.25011820330969264, "grad_norm": 4.3418169021606445, "learning_rate": 1.8661629429344782e-05, "loss": 0.2552, "step": 529 }, { "epoch": 0.25059101654846333, "grad_norm": 5.724131107330322, "learning_rate": 1.8653367248169547e-05, "loss": 0.2912, "step": 530 }, { "epoch": 0.251063829787234, "grad_norm": 5.735341548919678, "learning_rate": 1.864508148355149e-05, "loss": 0.2843, "step": 531 }, { "epoch": 0.2515366430260047, "grad_norm": 4.442595481872559, "learning_rate": 1.863677215807221e-05, "loss": 0.2334, "step": 532 }, { "epoch": 0.2520094562647754, "grad_norm": 6.313594341278076, "learning_rate": 1.862843929437751e-05, "loss": 0.3263, "step": 533 }, { "epoch": 0.2524822695035461, "grad_norm": 9.929341316223145, "learning_rate": 1.8620082915177363e-05, "loss": 0.2992, "step": 534 }, { "epoch": 0.25295508274231676, "grad_norm": 4.474004745483398, "learning_rate": 1.8611703043245807e-05, "loss": 0.2582, "step": 535 }, { "epoch": 0.25342789598108745, "grad_norm": 6.404626369476318, "learning_rate": 1.8603299701420915e-05, "loss": 0.2724, "step": 536 }, { "epoch": 0.25390070921985813, "grad_norm": 5.970371723175049, "learning_rate": 1.8594872912604723e-05, "loss": 0.3189, "step": 537 }, { "epoch": 0.2543735224586288, "grad_norm": 6.132597923278809, "learning_rate": 1.858642269976317e-05, "loss": 0.2448, "step": 538 }, { "epoch": 0.2548463356973995, "grad_norm": 4.322042942047119, "learning_rate": 1.8577949085926032e-05, "loss": 0.1853, "step": 539 }, { "epoch": 0.2553191489361702, "grad_norm": 4.78247594833374, "learning_rate": 1.8569452094186863e-05, "loss": 0.2012, "step": 540 }, { "epoch": 0.2553191489361702, "eval_accuracy": 0.8403547671840355, "eval_f1": 0.6470588235294118, "eval_loss": 0.3362201750278473, "eval_precision": 0.8516129032258064, "eval_recall": 0.5217391304347826, "eval_runtime": 48.1646, "eval_samples_per_second": 5.73, "eval_steps_per_second": 0.187, "step": 540 }, { "epoch": 0.2557919621749409, "grad_norm": 5.21437406539917, "learning_rate": 1.8560931747702924e-05, "loss": 0.2784, "step": 541 }, { "epoch": 0.25626477541371157, "grad_norm": 6.169255256652832, "learning_rate": 1.855238806969513e-05, "loss": 0.3128, "step": 542 }, { "epoch": 0.25673758865248225, "grad_norm": 5.7340874671936035, "learning_rate": 1.854382108344799e-05, "loss": 0.2696, "step": 543 }, { "epoch": 0.25721040189125294, "grad_norm": 7.384251594543457, "learning_rate": 1.853523081230952e-05, "loss": 0.2896, "step": 544 }, { "epoch": 0.2576832151300236, "grad_norm": 7.609536170959473, "learning_rate": 1.8526617279691207e-05, "loss": 0.276, "step": 545 }, { "epoch": 0.2581560283687943, "grad_norm": 5.231773853302002, "learning_rate": 1.8517980509067926e-05, "loss": 0.2765, "step": 546 }, { "epoch": 0.258628841607565, "grad_norm": 4.057672023773193, "learning_rate": 1.8509320523977895e-05, "loss": 0.1932, "step": 547 }, { "epoch": 0.2591016548463357, "grad_norm": 6.7243452072143555, "learning_rate": 1.8500637348022594e-05, "loss": 0.1775, "step": 548 }, { "epoch": 0.25957446808510637, "grad_norm": 5.530993461608887, "learning_rate": 1.84919310048667e-05, "loss": 0.2299, "step": 549 }, { "epoch": 0.26004728132387706, "grad_norm": 4.74931001663208, "learning_rate": 1.8483201518238032e-05, "loss": 0.1902, "step": 550 }, { "epoch": 0.26052009456264774, "grad_norm": 6.366207122802734, "learning_rate": 1.847444891192749e-05, "loss": 0.3109, "step": 551 }, { "epoch": 0.26099290780141843, "grad_norm": 8.735250473022461, "learning_rate": 1.8465673209788975e-05, "loss": 0.32, "step": 552 }, { "epoch": 0.2614657210401891, "grad_norm": 5.171599864959717, "learning_rate": 1.8456874435739337e-05, "loss": 0.2245, "step": 553 }, { "epoch": 0.2619385342789598, "grad_norm": 6.582614898681641, "learning_rate": 1.8448052613758297e-05, "loss": 0.2419, "step": 554 }, { "epoch": 0.2624113475177305, "grad_norm": 5.810616970062256, "learning_rate": 1.84392077678884e-05, "loss": 0.2402, "step": 555 }, { "epoch": 0.2628841607565012, "grad_norm": 6.7433271408081055, "learning_rate": 1.843033992223494e-05, "loss": 0.2887, "step": 556 }, { "epoch": 0.26335697399527186, "grad_norm": 3.9062905311584473, "learning_rate": 1.8421449100965884e-05, "loss": 0.1842, "step": 557 }, { "epoch": 0.26382978723404255, "grad_norm": 5.2100749015808105, "learning_rate": 1.8412535328311813e-05, "loss": 0.2191, "step": 558 }, { "epoch": 0.26430260047281323, "grad_norm": 4.192863941192627, "learning_rate": 1.8403598628565876e-05, "loss": 0.1958, "step": 559 }, { "epoch": 0.2647754137115839, "grad_norm": 8.32767105102539, "learning_rate": 1.839463902608369e-05, "loss": 0.3295, "step": 560 }, { "epoch": 0.2647754137115839, "eval_accuracy": 0.8492239467849224, "eval_f1": 0.6777251184834123, "eval_loss": 0.3414818048477173, "eval_precision": 0.8461538461538461, "eval_recall": 0.5652173913043478, "eval_runtime": 47.9942, "eval_samples_per_second": 5.751, "eval_steps_per_second": 0.188, "step": 560 }, { "epoch": 0.2652482269503546, "grad_norm": 7.091763019561768, "learning_rate": 1.8385656545283296e-05, "loss": 0.3177, "step": 561 }, { "epoch": 0.2657210401891253, "grad_norm": 4.874910354614258, "learning_rate": 1.8376651210645085e-05, "loss": 0.255, "step": 562 }, { "epoch": 0.266193853427896, "grad_norm": 5.0358710289001465, "learning_rate": 1.836762304671174e-05, "loss": 0.2031, "step": 563 }, { "epoch": 0.26666666666666666, "grad_norm": 6.5243330001831055, "learning_rate": 1.8358572078088144e-05, "loss": 0.2583, "step": 564 }, { "epoch": 0.26713947990543735, "grad_norm": 5.2152862548828125, "learning_rate": 1.8349498329441355e-05, "loss": 0.191, "step": 565 }, { "epoch": 0.26761229314420804, "grad_norm": 4.507115840911865, "learning_rate": 1.8340401825500496e-05, "loss": 0.1649, "step": 566 }, { "epoch": 0.2680851063829787, "grad_norm": 4.836716651916504, "learning_rate": 1.833128259105671e-05, "loss": 0.2041, "step": 567 }, { "epoch": 0.2685579196217494, "grad_norm": 5.212822437286377, "learning_rate": 1.832214065096309e-05, "loss": 0.2043, "step": 568 }, { "epoch": 0.2690307328605201, "grad_norm": 9.472026824951172, "learning_rate": 1.8312976030134613e-05, "loss": 0.377, "step": 569 }, { "epoch": 0.2695035460992908, "grad_norm": 7.958829879760742, "learning_rate": 1.8303788753548065e-05, "loss": 0.2281, "step": 570 }, { "epoch": 0.26997635933806147, "grad_norm": 6.508796215057373, "learning_rate": 1.829457884624198e-05, "loss": 0.1819, "step": 571 }, { "epoch": 0.27044917257683215, "grad_norm": 7.919535160064697, "learning_rate": 1.8285346333316564e-05, "loss": 0.2852, "step": 572 }, { "epoch": 0.27092198581560284, "grad_norm": 5.031895637512207, "learning_rate": 1.8276091239933634e-05, "loss": 0.1962, "step": 573 }, { "epoch": 0.2713947990543735, "grad_norm": 5.484118461608887, "learning_rate": 1.8266813591316548e-05, "loss": 0.2812, "step": 574 }, { "epoch": 0.2718676122931442, "grad_norm": 6.0553460121154785, "learning_rate": 1.825751341275013e-05, "loss": 0.2536, "step": 575 }, { "epoch": 0.2723404255319149, "grad_norm": 6.331978797912598, "learning_rate": 1.8248190729580613e-05, "loss": 0.2043, "step": 576 }, { "epoch": 0.2728132387706856, "grad_norm": 5.793010711669922, "learning_rate": 1.8238845567215554e-05, "loss": 0.2921, "step": 577 }, { "epoch": 0.27328605200945627, "grad_norm": 8.16348648071289, "learning_rate": 1.8229477951123785e-05, "loss": 0.3131, "step": 578 }, { "epoch": 0.27375886524822696, "grad_norm": 4.672780513763428, "learning_rate": 1.822008790683532e-05, "loss": 0.2961, "step": 579 }, { "epoch": 0.27423167848699764, "grad_norm": 5.705632209777832, "learning_rate": 1.8210675459941306e-05, "loss": 0.2859, "step": 580 }, { "epoch": 0.27423167848699764, "eval_accuracy": 0.843680709534368, "eval_f1": 0.6501240694789082, "eval_loss": 0.336950421333313, "eval_precision": 0.8733333333333333, "eval_recall": 0.5177865612648221, "eval_runtime": 48.0605, "eval_samples_per_second": 5.743, "eval_steps_per_second": 0.187, "step": 580 }, { "epoch": 0.27470449172576833, "grad_norm": 6.904634952545166, "learning_rate": 1.8201240636093948e-05, "loss": 0.2677, "step": 581 }, { "epoch": 0.275177304964539, "grad_norm": 6.0656328201293945, "learning_rate": 1.819178346100642e-05, "loss": 0.3325, "step": 582 }, { "epoch": 0.2756501182033097, "grad_norm": 4.885197162628174, "learning_rate": 1.8182303960452826e-05, "loss": 0.2458, "step": 583 }, { "epoch": 0.2761229314420804, "grad_norm": 5.368473052978516, "learning_rate": 1.8172802160268116e-05, "loss": 0.2929, "step": 584 }, { "epoch": 0.2765957446808511, "grad_norm": 4.78055477142334, "learning_rate": 1.8163278086347998e-05, "loss": 0.2534, "step": 585 }, { "epoch": 0.27706855791962176, "grad_norm": 8.146903038024902, "learning_rate": 1.8153731764648907e-05, "loss": 0.2733, "step": 586 }, { "epoch": 0.27754137115839245, "grad_norm": 3.793304681777954, "learning_rate": 1.8144163221187882e-05, "loss": 0.2232, "step": 587 }, { "epoch": 0.27801418439716313, "grad_norm": 4.330966949462891, "learning_rate": 1.8134572482042555e-05, "loss": 0.2709, "step": 588 }, { "epoch": 0.2784869976359338, "grad_norm": 3.585026264190674, "learning_rate": 1.8124959573351023e-05, "loss": 0.1779, "step": 589 }, { "epoch": 0.2789598108747045, "grad_norm": 5.1665754318237305, "learning_rate": 1.8115324521311823e-05, "loss": 0.2599, "step": 590 }, { "epoch": 0.2794326241134752, "grad_norm": 4.275265216827393, "learning_rate": 1.8105667352183823e-05, "loss": 0.1805, "step": 591 }, { "epoch": 0.2799054373522459, "grad_norm": 5.761347770690918, "learning_rate": 1.809598809228618e-05, "loss": 0.2043, "step": 592 }, { "epoch": 0.28037825059101656, "grad_norm": 4.9206647872924805, "learning_rate": 1.8086286767998253e-05, "loss": 0.2351, "step": 593 }, { "epoch": 0.28085106382978725, "grad_norm": 5.019208908081055, "learning_rate": 1.807656340575953e-05, "loss": 0.234, "step": 594 }, { "epoch": 0.28132387706855794, "grad_norm": 6.9271626472473145, "learning_rate": 1.8066818032069566e-05, "loss": 0.3302, "step": 595 }, { "epoch": 0.2817966903073286, "grad_norm": 6.962562084197998, "learning_rate": 1.80570506734879e-05, "loss": 0.265, "step": 596 }, { "epoch": 0.2822695035460993, "grad_norm": 6.923299789428711, "learning_rate": 1.804726135663399e-05, "loss": 0.27, "step": 597 }, { "epoch": 0.28274231678487, "grad_norm": 7.9528422355651855, "learning_rate": 1.803745010818714e-05, "loss": 0.2644, "step": 598 }, { "epoch": 0.2832151300236407, "grad_norm": 6.256555557250977, "learning_rate": 1.802761695488642e-05, "loss": 0.296, "step": 599 }, { "epoch": 0.28368794326241137, "grad_norm": 8.758502006530762, "learning_rate": 1.8017761923530602e-05, "loss": 0.2655, "step": 600 }, { "epoch": 0.28368794326241137, "eval_accuracy": 0.8492239467849224, "eval_f1": 0.6822429906542056, "eval_loss": 0.32478421926498413, "eval_precision": 0.8342857142857143, "eval_recall": 0.5770750988142292, "eval_runtime": 48.6007, "eval_samples_per_second": 5.679, "eval_steps_per_second": 0.185, "step": 600 }, { "epoch": 0.28416075650118205, "grad_norm": 6.9206929206848145, "learning_rate": 1.8007885040978078e-05, "loss": 0.2534, "step": 601 }, { "epoch": 0.28463356973995274, "grad_norm": 4.878746509552002, "learning_rate": 1.7997986334146808e-05, "loss": 0.2592, "step": 602 }, { "epoch": 0.2851063829787234, "grad_norm": 6.951952934265137, "learning_rate": 1.798806583001421e-05, "loss": 0.3073, "step": 603 }, { "epoch": 0.2855791962174941, "grad_norm": 6.235677242279053, "learning_rate": 1.7978123555617116e-05, "loss": 0.2217, "step": 604 }, { "epoch": 0.2860520094562648, "grad_norm": 5.96851110458374, "learning_rate": 1.7968159538051703e-05, "loss": 0.3361, "step": 605 }, { "epoch": 0.2865248226950355, "grad_norm": 7.12925386428833, "learning_rate": 1.7958173804473373e-05, "loss": 0.2454, "step": 606 }, { "epoch": 0.28699763593380617, "grad_norm": 5.562047481536865, "learning_rate": 1.7948166382096744e-05, "loss": 0.2518, "step": 607 }, { "epoch": 0.28747044917257686, "grad_norm": 5.448677062988281, "learning_rate": 1.793813729819553e-05, "loss": 0.241, "step": 608 }, { "epoch": 0.28794326241134754, "grad_norm": 6.051489353179932, "learning_rate": 1.7928086580102485e-05, "loss": 0.2748, "step": 609 }, { "epoch": 0.28841607565011823, "grad_norm": 5.5659661293029785, "learning_rate": 1.791801425520931e-05, "loss": 0.2435, "step": 610 }, { "epoch": 0.28888888888888886, "grad_norm": 6.0021071434021, "learning_rate": 1.790792035096661e-05, "loss": 0.2785, "step": 611 }, { "epoch": 0.28936170212765955, "grad_norm": 6.288322925567627, "learning_rate": 1.789780489488379e-05, "loss": 0.2959, "step": 612 }, { "epoch": 0.28983451536643023, "grad_norm": 5.30917501449585, "learning_rate": 1.7887667914528996e-05, "loss": 0.1903, "step": 613 }, { "epoch": 0.2903073286052009, "grad_norm": 5.338979721069336, "learning_rate": 1.7877509437529032e-05, "loss": 0.2522, "step": 614 }, { "epoch": 0.2907801418439716, "grad_norm": 9.307381629943848, "learning_rate": 1.7867329491569293e-05, "loss": 0.3809, "step": 615 }, { "epoch": 0.2912529550827423, "grad_norm": 8.209502220153809, "learning_rate": 1.785712810439368e-05, "loss": 0.3395, "step": 616 }, { "epoch": 0.291725768321513, "grad_norm": 4.827156066894531, "learning_rate": 1.7846905303804525e-05, "loss": 0.2298, "step": 617 }, { "epoch": 0.29219858156028367, "grad_norm": 3.754410743713379, "learning_rate": 1.783666111766253e-05, "loss": 0.149, "step": 618 }, { "epoch": 0.29267139479905435, "grad_norm": 5.818631649017334, "learning_rate": 1.782639557388667e-05, "loss": 0.3478, "step": 619 }, { "epoch": 0.29314420803782504, "grad_norm": 4.670815467834473, "learning_rate": 1.781610870045414e-05, "loss": 0.2646, "step": 620 }, { "epoch": 0.29314420803782504, "eval_accuracy": 0.8481152993348116, "eval_f1": 0.6682808716707022, "eval_loss": 0.3289998471736908, "eval_precision": 0.8625, "eval_recall": 0.5454545454545454, "eval_runtime": 47.6704, "eval_samples_per_second": 5.79, "eval_steps_per_second": 0.189, "step": 620 }, { "epoch": 0.2936170212765957, "grad_norm": 4.68503475189209, "learning_rate": 1.780580052540024e-05, "loss": 0.2216, "step": 621 }, { "epoch": 0.2940898345153664, "grad_norm": 8.284623146057129, "learning_rate": 1.7795471076818356e-05, "loss": 0.3633, "step": 622 }, { "epoch": 0.2945626477541371, "grad_norm": 4.425292015075684, "learning_rate": 1.7785120382859832e-05, "loss": 0.2103, "step": 623 }, { "epoch": 0.2950354609929078, "grad_norm": 5.457613468170166, "learning_rate": 1.7774748471733915e-05, "loss": 0.2615, "step": 624 }, { "epoch": 0.29550827423167847, "grad_norm": 7.534078598022461, "learning_rate": 1.776435537170768e-05, "loss": 0.2994, "step": 625 }, { "epoch": 0.29598108747044916, "grad_norm": 5.263175010681152, "learning_rate": 1.7753941111105954e-05, "loss": 0.221, "step": 626 }, { "epoch": 0.29645390070921984, "grad_norm": 6.045238971710205, "learning_rate": 1.7743505718311218e-05, "loss": 0.2957, "step": 627 }, { "epoch": 0.29692671394799053, "grad_norm": 6.148013591766357, "learning_rate": 1.7733049221763565e-05, "loss": 0.2611, "step": 628 }, { "epoch": 0.2973995271867612, "grad_norm": 5.272988796234131, "learning_rate": 1.772257164996059e-05, "loss": 0.2432, "step": 629 }, { "epoch": 0.2978723404255319, "grad_norm": 4.126927375793457, "learning_rate": 1.7712073031457332e-05, "loss": 0.2488, "step": 630 }, { "epoch": 0.2983451536643026, "grad_norm": 5.052836894989014, "learning_rate": 1.770155339486618e-05, "loss": 0.3185, "step": 631 }, { "epoch": 0.2988179669030733, "grad_norm": 6.2589240074157715, "learning_rate": 1.7691012768856817e-05, "loss": 0.2974, "step": 632 }, { "epoch": 0.29929078014184396, "grad_norm": 9.090741157531738, "learning_rate": 1.7680451182156123e-05, "loss": 0.3296, "step": 633 }, { "epoch": 0.29976359338061465, "grad_norm": 4.453991889953613, "learning_rate": 1.7669868663548105e-05, "loss": 0.2429, "step": 634 }, { "epoch": 0.30023640661938533, "grad_norm": 4.265166282653809, "learning_rate": 1.7659265241873815e-05, "loss": 0.2038, "step": 635 }, { "epoch": 0.300709219858156, "grad_norm": 6.1728901863098145, "learning_rate": 1.7648640946031273e-05, "loss": 0.2277, "step": 636 }, { "epoch": 0.3011820330969267, "grad_norm": 3.938297986984253, "learning_rate": 1.7637995804975392e-05, "loss": 0.2425, "step": 637 }, { "epoch": 0.3016548463356974, "grad_norm": 5.709317684173584, "learning_rate": 1.7627329847717888e-05, "loss": 0.2656, "step": 638 }, { "epoch": 0.3021276595744681, "grad_norm": 5.384950160980225, "learning_rate": 1.761664310332722e-05, "loss": 0.3077, "step": 639 }, { "epoch": 0.30260047281323876, "grad_norm": 5.811992645263672, "learning_rate": 1.7605935600928486e-05, "loss": 0.2706, "step": 640 }, { "epoch": 0.30260047281323876, "eval_accuracy": 0.8481152993348116, "eval_f1": 0.6836027713625866, "eval_loss": 0.3192698657512665, "eval_precision": 0.8222222222222222, "eval_recall": 0.5849802371541502, "eval_runtime": 48.4619, "eval_samples_per_second": 5.695, "eval_steps_per_second": 0.186, "step": 640 }, { "epoch": 0.30307328605200945, "grad_norm": 7.0490241050720215, "learning_rate": 1.759520736970337e-05, "loss": 0.3842, "step": 641 }, { "epoch": 0.30354609929078014, "grad_norm": 7.162063121795654, "learning_rate": 1.7584458438890036e-05, "loss": 0.3018, "step": 642 }, { "epoch": 0.3040189125295508, "grad_norm": 7.344574928283691, "learning_rate": 1.757368883778307e-05, "loss": 0.281, "step": 643 }, { "epoch": 0.3044917257683215, "grad_norm": 5.31951379776001, "learning_rate": 1.7562898595733395e-05, "loss": 0.2809, "step": 644 }, { "epoch": 0.3049645390070922, "grad_norm": 6.758824348449707, "learning_rate": 1.7552087742148176e-05, "loss": 0.2333, "step": 645 }, { "epoch": 0.3054373522458629, "grad_norm": 5.954471588134766, "learning_rate": 1.754125630649076e-05, "loss": 0.31, "step": 646 }, { "epoch": 0.30591016548463357, "grad_norm": 5.111174583435059, "learning_rate": 1.753040431828059e-05, "loss": 0.2112, "step": 647 }, { "epoch": 0.30638297872340425, "grad_norm": 3.9539942741394043, "learning_rate": 1.751953180709311e-05, "loss": 0.1611, "step": 648 }, { "epoch": 0.30685579196217494, "grad_norm": 4.952718257904053, "learning_rate": 1.750863880255971e-05, "loss": 0.2869, "step": 649 }, { "epoch": 0.3073286052009456, "grad_norm": 5.200640678405762, "learning_rate": 1.7497725334367627e-05, "loss": 0.2983, "step": 650 }, { "epoch": 0.3078014184397163, "grad_norm": 9.411211013793945, "learning_rate": 1.7486791432259858e-05, "loss": 0.2823, "step": 651 }, { "epoch": 0.308274231678487, "grad_norm": 5.560668468475342, "learning_rate": 1.7475837126035105e-05, "loss": 0.2646, "step": 652 }, { "epoch": 0.3087470449172577, "grad_norm": 4.861005783081055, "learning_rate": 1.746486244554767e-05, "loss": 0.3096, "step": 653 }, { "epoch": 0.30921985815602837, "grad_norm": 2.945842742919922, "learning_rate": 1.7453867420707386e-05, "loss": 0.1699, "step": 654 }, { "epoch": 0.30969267139479906, "grad_norm": 5.4764933586120605, "learning_rate": 1.7442852081479525e-05, "loss": 0.2001, "step": 655 }, { "epoch": 0.31016548463356974, "grad_norm": 5.168087005615234, "learning_rate": 1.743181645788473e-05, "loss": 0.207, "step": 656 }, { "epoch": 0.31063829787234043, "grad_norm": 6.098107814788818, "learning_rate": 1.742076057999892e-05, "loss": 0.2383, "step": 657 }, { "epoch": 0.3111111111111111, "grad_norm": 6.835310935974121, "learning_rate": 1.7409684477953224e-05, "loss": 0.2723, "step": 658 }, { "epoch": 0.3115839243498818, "grad_norm": 8.645564079284668, "learning_rate": 1.739858818193387e-05, "loss": 0.3614, "step": 659 }, { "epoch": 0.3120567375886525, "grad_norm": 5.342036724090576, "learning_rate": 1.738747172218215e-05, "loss": 0.2074, "step": 660 }, { "epoch": 0.3120567375886525, "eval_accuracy": 0.8470066518847007, "eval_f1": 0.655, "eval_loss": 0.3505603075027466, "eval_precision": 0.891156462585034, "eval_recall": 0.5177865612648221, "eval_runtime": 48.6815, "eval_samples_per_second": 5.67, "eval_steps_per_second": 0.185, "step": 660 }, { "epoch": 0.3125295508274232, "grad_norm": 6.081332683563232, "learning_rate": 1.7376335128994276e-05, "loss": 0.3087, "step": 661 }, { "epoch": 0.31300236406619386, "grad_norm": 5.119427680969238, "learning_rate": 1.7365178432721358e-05, "loss": 0.2799, "step": 662 }, { "epoch": 0.31347517730496455, "grad_norm": 7.370607376098633, "learning_rate": 1.7354001663769278e-05, "loss": 0.2989, "step": 663 }, { "epoch": 0.31394799054373523, "grad_norm": 4.7025885581970215, "learning_rate": 1.734280485259863e-05, "loss": 0.2622, "step": 664 }, { "epoch": 0.3144208037825059, "grad_norm": 7.617417812347412, "learning_rate": 1.7331588029724628e-05, "loss": 0.3428, "step": 665 }, { "epoch": 0.3148936170212766, "grad_norm": 4.964621543884277, "learning_rate": 1.7320351225717025e-05, "loss": 0.2216, "step": 666 }, { "epoch": 0.3153664302600473, "grad_norm": 6.546290397644043, "learning_rate": 1.730909447120003e-05, "loss": 0.2092, "step": 667 }, { "epoch": 0.315839243498818, "grad_norm": 6.265383243560791, "learning_rate": 1.7297817796852227e-05, "loss": 0.2734, "step": 668 }, { "epoch": 0.31631205673758866, "grad_norm": 5.259603500366211, "learning_rate": 1.728652123340648e-05, "loss": 0.2409, "step": 669 }, { "epoch": 0.31678486997635935, "grad_norm": 6.892948627471924, "learning_rate": 1.7275204811649865e-05, "loss": 0.311, "step": 670 }, { "epoch": 0.31725768321513004, "grad_norm": 4.608394145965576, "learning_rate": 1.7263868562423577e-05, "loss": 0.2553, "step": 671 }, { "epoch": 0.3177304964539007, "grad_norm": 6.246408462524414, "learning_rate": 1.725251251662285e-05, "loss": 0.2753, "step": 672 }, { "epoch": 0.3182033096926714, "grad_norm": 5.615715026855469, "learning_rate": 1.7241136705196865e-05, "loss": 0.1744, "step": 673 }, { "epoch": 0.3186761229314421, "grad_norm": 4.1983642578125, "learning_rate": 1.7229741159148676e-05, "loss": 0.2054, "step": 674 }, { "epoch": 0.3191489361702128, "grad_norm": 7.475837230682373, "learning_rate": 1.7218325909535118e-05, "loss": 0.2695, "step": 675 }, { "epoch": 0.31962174940898347, "grad_norm": 6.710148334503174, "learning_rate": 1.7206890987466726e-05, "loss": 0.2597, "step": 676 }, { "epoch": 0.32009456264775416, "grad_norm": 5.378614902496338, "learning_rate": 1.7195436424107648e-05, "loss": 0.2669, "step": 677 }, { "epoch": 0.32056737588652484, "grad_norm": 6.92887020111084, "learning_rate": 1.7183962250675568e-05, "loss": 0.3035, "step": 678 }, { "epoch": 0.3210401891252955, "grad_norm": 5.467234134674072, "learning_rate": 1.7172468498441604e-05, "loss": 0.2622, "step": 679 }, { "epoch": 0.3215130023640662, "grad_norm": 5.692148685455322, "learning_rate": 1.7160955198730244e-05, "loss": 0.2825, "step": 680 }, { "epoch": 0.3215130023640662, "eval_accuracy": 0.8281596452328159, "eval_f1": 0.5931758530183727, "eval_loss": 0.35231587290763855, "eval_precision": 0.8828125, "eval_recall": 0.44664031620553357, "eval_runtime": 47.4158, "eval_samples_per_second": 5.821, "eval_steps_per_second": 0.19, "step": 680 }, { "epoch": 0.3219858156028369, "grad_norm": 5.253277778625488, "learning_rate": 1.7149422382919237e-05, "loss": 0.2007, "step": 681 }, { "epoch": 0.3224586288416076, "grad_norm": 5.658674716949463, "learning_rate": 1.7137870082439533e-05, "loss": 0.2242, "step": 682 }, { "epoch": 0.3229314420803783, "grad_norm": 6.746735095977783, "learning_rate": 1.7126298328775175e-05, "loss": 0.3869, "step": 683 }, { "epoch": 0.32340425531914896, "grad_norm": 4.4457173347473145, "learning_rate": 1.711470715346323e-05, "loss": 0.207, "step": 684 }, { "epoch": 0.32387706855791965, "grad_norm": 4.7827935218811035, "learning_rate": 1.7103096588093686e-05, "loss": 0.1964, "step": 685 }, { "epoch": 0.32434988179669033, "grad_norm": 5.443333148956299, "learning_rate": 1.7091466664309385e-05, "loss": 0.2212, "step": 686 }, { "epoch": 0.324822695035461, "grad_norm": 7.0208539962768555, "learning_rate": 1.7079817413805927e-05, "loss": 0.38, "step": 687 }, { "epoch": 0.3252955082742317, "grad_norm": 4.507380485534668, "learning_rate": 1.706814886833158e-05, "loss": 0.2782, "step": 688 }, { "epoch": 0.3257683215130024, "grad_norm": 5.8691301345825195, "learning_rate": 1.7056461059687195e-05, "loss": 0.2178, "step": 689 }, { "epoch": 0.3262411347517731, "grad_norm": 7.219882011413574, "learning_rate": 1.7044754019726127e-05, "loss": 0.2707, "step": 690 }, { "epoch": 0.32671394799054376, "grad_norm": 5.678999900817871, "learning_rate": 1.703302778035415e-05, "loss": 0.258, "step": 691 }, { "epoch": 0.3271867612293144, "grad_norm": 6.334179878234863, "learning_rate": 1.702128237352934e-05, "loss": 0.2489, "step": 692 }, { "epoch": 0.3276595744680851, "grad_norm": 7.485446453094482, "learning_rate": 1.7009517831262034e-05, "loss": 0.3043, "step": 693 }, { "epoch": 0.32813238770685577, "grad_norm": 5.8358354568481445, "learning_rate": 1.6997734185614712e-05, "loss": 0.2401, "step": 694 }, { "epoch": 0.32860520094562645, "grad_norm": 5.5207319259643555, "learning_rate": 1.6985931468701915e-05, "loss": 0.2512, "step": 695 }, { "epoch": 0.32907801418439714, "grad_norm": 5.306708335876465, "learning_rate": 1.6974109712690163e-05, "loss": 0.2479, "step": 696 }, { "epoch": 0.3295508274231678, "grad_norm": 5.970691204071045, "learning_rate": 1.6962268949797862e-05, "loss": 0.2745, "step": 697 }, { "epoch": 0.3300236406619385, "grad_norm": 4.881795883178711, "learning_rate": 1.695040921229522e-05, "loss": 0.1999, "step": 698 }, { "epoch": 0.3304964539007092, "grad_norm": 3.6859960556030273, "learning_rate": 1.6938530532504155e-05, "loss": 0.1434, "step": 699 }, { "epoch": 0.3309692671394799, "grad_norm": 6.081749439239502, "learning_rate": 1.692663294279821e-05, "loss": 0.2718, "step": 700 }, { "epoch": 0.3309692671394799, "eval_accuracy": 0.8270509977827051, "eval_f1": 0.5828877005347594, "eval_loss": 0.3708072304725647, "eval_precision": 0.9008264462809917, "eval_recall": 0.4308300395256917, "eval_runtime": 49.3785, "eval_samples_per_second": 5.589, "eval_steps_per_second": 0.182, "step": 700 }, { "epoch": 0.33144208037825057, "grad_norm": 4.934723377227783, "learning_rate": 1.6914716475602474e-05, "loss": 0.1914, "step": 701 }, { "epoch": 0.33191489361702126, "grad_norm": 6.057024955749512, "learning_rate": 1.690278116339346e-05, "loss": 0.2151, "step": 702 }, { "epoch": 0.33238770685579194, "grad_norm": 5.484874248504639, "learning_rate": 1.689082703869907e-05, "loss": 0.2675, "step": 703 }, { "epoch": 0.33286052009456263, "grad_norm": 7.429450511932373, "learning_rate": 1.687885413409845e-05, "loss": 0.3249, "step": 704 }, { "epoch": 0.3333333333333333, "grad_norm": 7.455477714538574, "learning_rate": 1.6866862482221948e-05, "loss": 0.3455, "step": 705 }, { "epoch": 0.333806146572104, "grad_norm": 6.469564437866211, "learning_rate": 1.685485211575099e-05, "loss": 0.2674, "step": 706 }, { "epoch": 0.3342789598108747, "grad_norm": 4.929532527923584, "learning_rate": 1.684282306741802e-05, "loss": 0.2082, "step": 707 }, { "epoch": 0.3347517730496454, "grad_norm": 5.234260082244873, "learning_rate": 1.6830775370006377e-05, "loss": 0.1776, "step": 708 }, { "epoch": 0.33522458628841606, "grad_norm": 7.192393779754639, "learning_rate": 1.681870905635025e-05, "loss": 0.2546, "step": 709 }, { "epoch": 0.33569739952718675, "grad_norm": 5.962497711181641, "learning_rate": 1.680662415933454e-05, "loss": 0.2344, "step": 710 }, { "epoch": 0.33617021276595743, "grad_norm": 6.120792865753174, "learning_rate": 1.679452071189481e-05, "loss": 0.2552, "step": 711 }, { "epoch": 0.3366430260047281, "grad_norm": 8.747769355773926, "learning_rate": 1.6782398747017176e-05, "loss": 0.3015, "step": 712 }, { "epoch": 0.3371158392434988, "grad_norm": 4.518637657165527, "learning_rate": 1.6770258297738213e-05, "loss": 0.2825, "step": 713 }, { "epoch": 0.3375886524822695, "grad_norm": 6.613455295562744, "learning_rate": 1.6758099397144884e-05, "loss": 0.3259, "step": 714 }, { "epoch": 0.3380614657210402, "grad_norm": 6.923367023468018, "learning_rate": 1.674592207837443e-05, "loss": 0.2302, "step": 715 }, { "epoch": 0.33853427895981086, "grad_norm": 7.601401329040527, "learning_rate": 1.6733726374614287e-05, "loss": 0.2771, "step": 716 }, { "epoch": 0.33900709219858155, "grad_norm": 4.864050388336182, "learning_rate": 1.6721512319102006e-05, "loss": 0.2364, "step": 717 }, { "epoch": 0.33947990543735224, "grad_norm": 4.241363048553467, "learning_rate": 1.670927994512514e-05, "loss": 0.2275, "step": 718 }, { "epoch": 0.3399527186761229, "grad_norm": 9.842682838439941, "learning_rate": 1.6697029286021182e-05, "loss": 0.3548, "step": 719 }, { "epoch": 0.3404255319148936, "grad_norm": 7.171640396118164, "learning_rate": 1.6684760375177442e-05, "loss": 0.2172, "step": 720 }, { "epoch": 0.3404255319148936, "eval_accuracy": 0.8237250554323725, "eval_f1": 0.5667574931880109, "eval_loss": 0.3734827935695648, "eval_precision": 0.9122807017543859, "eval_recall": 0.41106719367588934, "eval_runtime": 47.8437, "eval_samples_per_second": 5.769, "eval_steps_per_second": 0.188, "step": 720 }, { "epoch": 0.3408983451536643, "grad_norm": 5.395704746246338, "learning_rate": 1.667247324603098e-05, "loss": 0.1952, "step": 721 }, { "epoch": 0.341371158392435, "grad_norm": 6.559274196624756, "learning_rate": 1.666016793206851e-05, "loss": 0.2231, "step": 722 }, { "epoch": 0.34184397163120567, "grad_norm": 5.171023368835449, "learning_rate": 1.6647844466826302e-05, "loss": 0.251, "step": 723 }, { "epoch": 0.34231678486997635, "grad_norm": 6.1227898597717285, "learning_rate": 1.6635502883890098e-05, "loss": 0.2674, "step": 724 }, { "epoch": 0.34278959810874704, "grad_norm": 4.9917802810668945, "learning_rate": 1.6623143216895008e-05, "loss": 0.2228, "step": 725 }, { "epoch": 0.3432624113475177, "grad_norm": 4.765135765075684, "learning_rate": 1.661076549952544e-05, "loss": 0.1833, "step": 726 }, { "epoch": 0.3437352245862884, "grad_norm": 5.079737186431885, "learning_rate": 1.6598369765514986e-05, "loss": 0.2315, "step": 727 }, { "epoch": 0.3442080378250591, "grad_norm": 8.060894012451172, "learning_rate": 1.6585956048646345e-05, "loss": 0.3144, "step": 728 }, { "epoch": 0.3446808510638298, "grad_norm": 5.104706287384033, "learning_rate": 1.657352438275122e-05, "loss": 0.2241, "step": 729 }, { "epoch": 0.34515366430260047, "grad_norm": 7.560702323913574, "learning_rate": 1.656107480171024e-05, "loss": 0.2651, "step": 730 }, { "epoch": 0.34562647754137116, "grad_norm": 4.7865190505981445, "learning_rate": 1.6548607339452853e-05, "loss": 0.1864, "step": 731 }, { "epoch": 0.34609929078014184, "grad_norm": 7.309717178344727, "learning_rate": 1.6536122029957237e-05, "loss": 0.2793, "step": 732 }, { "epoch": 0.34657210401891253, "grad_norm": 5.886257171630859, "learning_rate": 1.6523618907250215e-05, "loss": 0.283, "step": 733 }, { "epoch": 0.3470449172576832, "grad_norm": 7.503266334533691, "learning_rate": 1.6511098005407157e-05, "loss": 0.2675, "step": 734 }, { "epoch": 0.3475177304964539, "grad_norm": 6.831967830657959, "learning_rate": 1.6498559358551885e-05, "loss": 0.2302, "step": 735 }, { "epoch": 0.3479905437352246, "grad_norm": 5.59326696395874, "learning_rate": 1.6486003000856587e-05, "loss": 0.2629, "step": 736 }, { "epoch": 0.3484633569739953, "grad_norm": 6.322920799255371, "learning_rate": 1.647342896654171e-05, "loss": 0.3043, "step": 737 }, { "epoch": 0.34893617021276596, "grad_norm": 7.298335552215576, "learning_rate": 1.6460837289875886e-05, "loss": 0.2891, "step": 738 }, { "epoch": 0.34940898345153665, "grad_norm": 5.698408126831055, "learning_rate": 1.6448228005175818e-05, "loss": 0.2265, "step": 739 }, { "epoch": 0.34988179669030733, "grad_norm": 4.371240139007568, "learning_rate": 1.643560114680621e-05, "loss": 0.1876, "step": 740 }, { "epoch": 0.34988179669030733, "eval_accuracy": 0.8392461197339246, "eval_f1": 0.6214099216710183, "eval_loss": 0.3518848717212677, "eval_precision": 0.9153846153846154, "eval_recall": 0.47035573122529645, "eval_runtime": 47.5745, "eval_samples_per_second": 5.801, "eval_steps_per_second": 0.189, "step": 740 }, { "epoch": 0.350354609929078, "grad_norm": 5.097601413726807, "learning_rate": 1.642295674917965e-05, "loss": 0.2459, "step": 741 }, { "epoch": 0.3508274231678487, "grad_norm": 6.104417324066162, "learning_rate": 1.641029484675653e-05, "loss": 0.1901, "step": 742 }, { "epoch": 0.3513002364066194, "grad_norm": 6.226688385009766, "learning_rate": 1.639761547404495e-05, "loss": 0.2534, "step": 743 }, { "epoch": 0.3517730496453901, "grad_norm": 6.888615608215332, "learning_rate": 1.6384918665600623e-05, "loss": 0.2798, "step": 744 }, { "epoch": 0.35224586288416077, "grad_norm": 3.7885279655456543, "learning_rate": 1.6372204456026774e-05, "loss": 0.177, "step": 745 }, { "epoch": 0.35271867612293145, "grad_norm": 7.243451118469238, "learning_rate": 1.6359472879974064e-05, "loss": 0.2581, "step": 746 }, { "epoch": 0.35319148936170214, "grad_norm": 5.321907043457031, "learning_rate": 1.634672397214047e-05, "loss": 0.2978, "step": 747 }, { "epoch": 0.3536643026004728, "grad_norm": 4.163849830627441, "learning_rate": 1.633395776727121e-05, "loss": 0.1865, "step": 748 }, { "epoch": 0.3541371158392435, "grad_norm": 5.830822467803955, "learning_rate": 1.632117430015865e-05, "loss": 0.2759, "step": 749 }, { "epoch": 0.3546099290780142, "grad_norm": 6.779140949249268, "learning_rate": 1.6308373605642192e-05, "loss": 0.2363, "step": 750 }, { "epoch": 0.3550827423167849, "grad_norm": 5.8843770027160645, "learning_rate": 1.629555571860819e-05, "loss": 0.2933, "step": 751 }, { "epoch": 0.35555555555555557, "grad_norm": 4.655647277832031, "learning_rate": 1.628272067398986e-05, "loss": 0.1564, "step": 752 }, { "epoch": 0.35602836879432626, "grad_norm": 5.365359306335449, "learning_rate": 1.626986850676717e-05, "loss": 0.1929, "step": 753 }, { "epoch": 0.35650118203309694, "grad_norm": 4.647514820098877, "learning_rate": 1.625699925196675e-05, "loss": 0.1701, "step": 754 }, { "epoch": 0.35697399527186763, "grad_norm": 5.949582099914551, "learning_rate": 1.624411294466182e-05, "loss": 0.1973, "step": 755 }, { "epoch": 0.3574468085106383, "grad_norm": 6.162478446960449, "learning_rate": 1.623120961997205e-05, "loss": 0.1898, "step": 756 }, { "epoch": 0.357919621749409, "grad_norm": 5.563331604003906, "learning_rate": 1.6218289313063503e-05, "loss": 0.228, "step": 757 }, { "epoch": 0.3583924349881797, "grad_norm": 4.954248428344727, "learning_rate": 1.6205352059148522e-05, "loss": 0.2102, "step": 758 }, { "epoch": 0.3588652482269504, "grad_norm": 5.003850936889648, "learning_rate": 1.619239789348563e-05, "loss": 0.2235, "step": 759 }, { "epoch": 0.35933806146572106, "grad_norm": 7.022822856903076, "learning_rate": 1.6179426851379443e-05, "loss": 0.2788, "step": 760 }, { "epoch": 0.35933806146572106, "eval_accuracy": 0.8348115299334812, "eval_f1": 0.6246851385390428, "eval_loss": 0.3573962152004242, "eval_precision": 0.8611111111111112, "eval_recall": 0.4901185770750988, "eval_runtime": 47.6981, "eval_samples_per_second": 5.786, "eval_steps_per_second": 0.189, "step": 760 }, { "epoch": 0.35981087470449175, "grad_norm": 5.163010597229004, "learning_rate": 1.6166438968180582e-05, "loss": 0.194, "step": 761 }, { "epoch": 0.36028368794326243, "grad_norm": 7.249414920806885, "learning_rate": 1.615343427928555e-05, "loss": 0.2594, "step": 762 }, { "epoch": 0.3607565011820331, "grad_norm": 3.0018277168273926, "learning_rate": 1.614041282013666e-05, "loss": 0.1381, "step": 763 }, { "epoch": 0.3612293144208038, "grad_norm": 5.234002590179443, "learning_rate": 1.6127374626221934e-05, "loss": 0.2252, "step": 764 }, { "epoch": 0.3617021276595745, "grad_norm": 5.7721848487854, "learning_rate": 1.6114319733074986e-05, "loss": 0.3073, "step": 765 }, { "epoch": 0.3621749408983452, "grad_norm": 7.471461296081543, "learning_rate": 1.6101248176274958e-05, "loss": 0.2948, "step": 766 }, { "epoch": 0.36264775413711586, "grad_norm": 6.112615585327148, "learning_rate": 1.6088159991446397e-05, "loss": 0.2433, "step": 767 }, { "epoch": 0.36312056737588655, "grad_norm": 5.112131118774414, "learning_rate": 1.6075055214259174e-05, "loss": 0.1972, "step": 768 }, { "epoch": 0.36359338061465724, "grad_norm": 6.367164611816406, "learning_rate": 1.606193388042837e-05, "loss": 0.2283, "step": 769 }, { "epoch": 0.3640661938534279, "grad_norm": 6.986507892608643, "learning_rate": 1.60487960257142e-05, "loss": 0.233, "step": 770 }, { "epoch": 0.3645390070921986, "grad_norm": 4.459200382232666, "learning_rate": 1.6035641685921895e-05, "loss": 0.1947, "step": 771 }, { "epoch": 0.3650118203309693, "grad_norm": 4.415493965148926, "learning_rate": 1.602247089690162e-05, "loss": 0.1612, "step": 772 }, { "epoch": 0.3654846335697399, "grad_norm": 6.583262920379639, "learning_rate": 1.6009283694548365e-05, "loss": 0.234, "step": 773 }, { "epoch": 0.3659574468085106, "grad_norm": 7.73126745223999, "learning_rate": 1.5996080114801858e-05, "loss": 0.2687, "step": 774 }, { "epoch": 0.3664302600472813, "grad_norm": 6.350796222686768, "learning_rate": 1.598286019364645e-05, "loss": 0.2279, "step": 775 }, { "epoch": 0.366903073286052, "grad_norm": 4.372172832489014, "learning_rate": 1.596962396711104e-05, "loss": 0.1742, "step": 776 }, { "epoch": 0.36737588652482267, "grad_norm": 7.295071125030518, "learning_rate": 1.5956371471268968e-05, "loss": 0.2632, "step": 777 }, { "epoch": 0.36784869976359336, "grad_norm": 7.446830749511719, "learning_rate": 1.5943102742237894e-05, "loss": 0.2026, "step": 778 }, { "epoch": 0.36832151300236404, "grad_norm": 6.453531742095947, "learning_rate": 1.5929817816179733e-05, "loss": 0.3007, "step": 779 }, { "epoch": 0.36879432624113473, "grad_norm": 5.014437198638916, "learning_rate": 1.591651672930054e-05, "loss": 0.305, "step": 780 }, { "epoch": 0.36879432624113473, "eval_accuracy": 0.8580931263858093, "eval_f1": 0.7037037037037037, "eval_loss": 0.31539109349250793, "eval_precision": 0.8491620111731844, "eval_recall": 0.6007905138339921, "eval_runtime": 47.4123, "eval_samples_per_second": 5.821, "eval_steps_per_second": 0.19, "step": 780 }, { "epoch": 0.3692671394799054, "grad_norm": 4.877912521362305, "learning_rate": 1.5903199517850422e-05, "loss": 0.2521, "step": 781 }, { "epoch": 0.3697399527186761, "grad_norm": 5.180963516235352, "learning_rate": 1.5889866218123414e-05, "loss": 0.2296, "step": 782 }, { "epoch": 0.3702127659574468, "grad_norm": 6.379220008850098, "learning_rate": 1.5876516866457412e-05, "loss": 0.252, "step": 783 }, { "epoch": 0.3706855791962175, "grad_norm": 5.103360652923584, "learning_rate": 1.5863151499234053e-05, "loss": 0.2349, "step": 784 }, { "epoch": 0.37115839243498816, "grad_norm": 5.573483943939209, "learning_rate": 1.5849770152878622e-05, "loss": 0.2627, "step": 785 }, { "epoch": 0.37163120567375885, "grad_norm": 4.954294681549072, "learning_rate": 1.583637286385995e-05, "loss": 0.2475, "step": 786 }, { "epoch": 0.37210401891252953, "grad_norm": 8.134040832519531, "learning_rate": 1.5822959668690325e-05, "loss": 0.4456, "step": 787 }, { "epoch": 0.3725768321513002, "grad_norm": 5.643916130065918, "learning_rate": 1.5809530603925378e-05, "loss": 0.3051, "step": 788 }, { "epoch": 0.3730496453900709, "grad_norm": 5.253788471221924, "learning_rate": 1.5796085706163997e-05, "loss": 0.254, "step": 789 }, { "epoch": 0.3735224586288416, "grad_norm": 7.210267543792725, "learning_rate": 1.5782625012048212e-05, "loss": 0.2753, "step": 790 }, { "epoch": 0.3739952718676123, "grad_norm": 4.508166790008545, "learning_rate": 1.5769148558263108e-05, "loss": 0.3106, "step": 791 }, { "epoch": 0.37446808510638296, "grad_norm": 4.896869659423828, "learning_rate": 1.575565638153672e-05, "loss": 0.2384, "step": 792 }, { "epoch": 0.37494089834515365, "grad_norm": 4.60262393951416, "learning_rate": 1.574214851863993e-05, "loss": 0.2742, "step": 793 }, { "epoch": 0.37541371158392434, "grad_norm": 5.893795490264893, "learning_rate": 1.572862500638639e-05, "loss": 0.2526, "step": 794 }, { "epoch": 0.375886524822695, "grad_norm": 4.181454181671143, "learning_rate": 1.5715085881632366e-05, "loss": 0.2012, "step": 795 }, { "epoch": 0.3763593380614657, "grad_norm": 5.785640716552734, "learning_rate": 1.5701531181276703e-05, "loss": 0.2868, "step": 796 }, { "epoch": 0.3768321513002364, "grad_norm": 4.539717197418213, "learning_rate": 1.5687960942260687e-05, "loss": 0.1804, "step": 797 }, { "epoch": 0.3773049645390071, "grad_norm": 4.89398717880249, "learning_rate": 1.5674375201567948e-05, "loss": 0.1735, "step": 798 }, { "epoch": 0.37777777777777777, "grad_norm": 6.449411392211914, "learning_rate": 1.566077399622436e-05, "loss": 0.342, "step": 799 }, { "epoch": 0.37825059101654845, "grad_norm": 4.286831378936768, "learning_rate": 1.5647157363297964e-05, "loss": 0.2726, "step": 800 }, { "epoch": 0.37825059101654845, "eval_accuracy": 0.8458980044345898, "eval_f1": 0.6567901234567901, "eval_loss": 0.3148706555366516, "eval_precision": 0.875, "eval_recall": 0.525691699604743, "eval_runtime": 47.1619, "eval_samples_per_second": 5.852, "eval_steps_per_second": 0.191, "step": 800 }, { "epoch": 0.37872340425531914, "grad_norm": 5.2504563331604, "learning_rate": 1.5633525339898818e-05, "loss": 0.2679, "step": 801 }, { "epoch": 0.3791962174940898, "grad_norm": 5.007554531097412, "learning_rate": 1.5619877963178952e-05, "loss": 0.2399, "step": 802 }, { "epoch": 0.3796690307328605, "grad_norm": 3.274820566177368, "learning_rate": 1.5606215270332216e-05, "loss": 0.1511, "step": 803 }, { "epoch": 0.3801418439716312, "grad_norm": 4.302379131317139, "learning_rate": 1.559253729859421e-05, "loss": 0.2247, "step": 804 }, { "epoch": 0.3806146572104019, "grad_norm": 4.160916805267334, "learning_rate": 1.5578844085242185e-05, "loss": 0.2082, "step": 805 }, { "epoch": 0.38108747044917257, "grad_norm": 5.578160285949707, "learning_rate": 1.5565135667594916e-05, "loss": 0.3049, "step": 806 }, { "epoch": 0.38156028368794326, "grad_norm": 7.35500431060791, "learning_rate": 1.555141208301262e-05, "loss": 0.2808, "step": 807 }, { "epoch": 0.38203309692671394, "grad_norm": 5.54599666595459, "learning_rate": 1.5537673368896853e-05, "loss": 0.2069, "step": 808 }, { "epoch": 0.38250591016548463, "grad_norm": 4.696985721588135, "learning_rate": 1.55239195626904e-05, "loss": 0.2765, "step": 809 }, { "epoch": 0.3829787234042553, "grad_norm": 6.143385410308838, "learning_rate": 1.5510150701877178e-05, "loss": 0.1958, "step": 810 }, { "epoch": 0.383451536643026, "grad_norm": 6.515667915344238, "learning_rate": 1.549636682398213e-05, "loss": 0.2543, "step": 811 }, { "epoch": 0.3839243498817967, "grad_norm": 4.393880367279053, "learning_rate": 1.5482567966571136e-05, "loss": 0.2278, "step": 812 }, { "epoch": 0.3843971631205674, "grad_norm": 8.271415710449219, "learning_rate": 1.546875416725089e-05, "loss": 0.25, "step": 813 }, { "epoch": 0.38486997635933806, "grad_norm": 5.564967155456543, "learning_rate": 1.5454925463668812e-05, "loss": 0.2286, "step": 814 }, { "epoch": 0.38534278959810875, "grad_norm": 4.746275424957275, "learning_rate": 1.5441081893512933e-05, "loss": 0.2164, "step": 815 }, { "epoch": 0.38581560283687943, "grad_norm": 7.916270732879639, "learning_rate": 1.5427223494511824e-05, "loss": 0.3749, "step": 816 }, { "epoch": 0.3862884160756501, "grad_norm": 4.836629867553711, "learning_rate": 1.541335030443444e-05, "loss": 0.1946, "step": 817 }, { "epoch": 0.3867612293144208, "grad_norm": 5.497342586517334, "learning_rate": 1.539946236109007e-05, "loss": 0.2712, "step": 818 }, { "epoch": 0.3872340425531915, "grad_norm": 4.717584133148193, "learning_rate": 1.5385559702328195e-05, "loss": 0.239, "step": 819 }, { "epoch": 0.3877068557919622, "grad_norm": 6.673068046569824, "learning_rate": 1.5371642366038412e-05, "loss": 0.2819, "step": 820 }, { "epoch": 0.3877068557919622, "eval_accuracy": 0.8580931263858093, "eval_f1": 0.7276595744680852, "eval_loss": 0.30154746770858765, "eval_precision": 0.7880184331797235, "eval_recall": 0.6758893280632411, "eval_runtime": 48.4441, "eval_samples_per_second": 5.697, "eval_steps_per_second": 0.186, "step": 820 }, { "epoch": 0.38817966903073287, "grad_norm": 8.295758247375488, "learning_rate": 1.5357710390150312e-05, "loss": 0.2953, "step": 821 }, { "epoch": 0.38865248226950355, "grad_norm": 6.9379730224609375, "learning_rate": 1.5343763812633393e-05, "loss": 0.2614, "step": 822 }, { "epoch": 0.38912529550827424, "grad_norm": 5.640291690826416, "learning_rate": 1.5329802671496935e-05, "loss": 0.2978, "step": 823 }, { "epoch": 0.3895981087470449, "grad_norm": 5.361009120941162, "learning_rate": 1.5315827004789918e-05, "loss": 0.3108, "step": 824 }, { "epoch": 0.3900709219858156, "grad_norm": 5.312415599822998, "learning_rate": 1.53018368506009e-05, "loss": 0.1958, "step": 825 }, { "epoch": 0.3905437352245863, "grad_norm": 4.690582752227783, "learning_rate": 1.5287832247057936e-05, "loss": 0.2102, "step": 826 }, { "epoch": 0.391016548463357, "grad_norm": 5.4220099449157715, "learning_rate": 1.527381323232845e-05, "loss": 0.1965, "step": 827 }, { "epoch": 0.39148936170212767, "grad_norm": 6.630805969238281, "learning_rate": 1.5259779844619152e-05, "loss": 0.2573, "step": 828 }, { "epoch": 0.39196217494089836, "grad_norm": 4.912630081176758, "learning_rate": 1.524573212217591e-05, "loss": 0.2715, "step": 829 }, { "epoch": 0.39243498817966904, "grad_norm": 5.768490314483643, "learning_rate": 1.5231670103283665e-05, "loss": 0.2107, "step": 830 }, { "epoch": 0.39290780141843973, "grad_norm": 11.683192253112793, "learning_rate": 1.521759382626632e-05, "loss": 0.3559, "step": 831 }, { "epoch": 0.3933806146572104, "grad_norm": 6.212742805480957, "learning_rate": 1.5203503329486649e-05, "loss": 0.299, "step": 832 }, { "epoch": 0.3938534278959811, "grad_norm": 11.167441368103027, "learning_rate": 1.5189398651346153e-05, "loss": 0.4404, "step": 833 }, { "epoch": 0.3943262411347518, "grad_norm": 4.801130294799805, "learning_rate": 1.5175279830285006e-05, "loss": 0.2968, "step": 834 }, { "epoch": 0.3947990543735225, "grad_norm": 4.244668006896973, "learning_rate": 1.5161146904781918e-05, "loss": 0.2195, "step": 835 }, { "epoch": 0.39527186761229316, "grad_norm": 4.198855876922607, "learning_rate": 1.514699991335404e-05, "loss": 0.2572, "step": 836 }, { "epoch": 0.39574468085106385, "grad_norm": 3.7614452838897705, "learning_rate": 1.5132838894556848e-05, "loss": 0.2454, "step": 837 }, { "epoch": 0.39621749408983453, "grad_norm": 3.7950305938720703, "learning_rate": 1.5118663886984065e-05, "loss": 0.2254, "step": 838 }, { "epoch": 0.3966903073286052, "grad_norm": 7.833040714263916, "learning_rate": 1.510447492926752e-05, "loss": 0.3283, "step": 839 }, { "epoch": 0.3971631205673759, "grad_norm": 4.6039204597473145, "learning_rate": 1.5090272060077081e-05, "loss": 0.2596, "step": 840 }, { "epoch": 0.3971631205673759, "eval_accuracy": 0.8547671840354767, "eval_f1": 0.7298969072164948, "eval_loss": 0.3099477291107178, "eval_precision": 0.7629310344827587, "eval_recall": 0.6996047430830039, "eval_runtime": 48.4738, "eval_samples_per_second": 5.694, "eval_steps_per_second": 0.186, "step": 840 }, { "epoch": 0.3976359338061466, "grad_norm": 6.1639485359191895, "learning_rate": 1.5076055318120508e-05, "loss": 0.2616, "step": 841 }, { "epoch": 0.3981087470449173, "grad_norm": 5.882129192352295, "learning_rate": 1.5061824742143388e-05, "loss": 0.2296, "step": 842 }, { "epoch": 0.39858156028368796, "grad_norm": 6.4986772537231445, "learning_rate": 1.5047580370928994e-05, "loss": 0.3221, "step": 843 }, { "epoch": 0.39905437352245865, "grad_norm": 4.624194622039795, "learning_rate": 1.5033322243298209e-05, "loss": 0.2522, "step": 844 }, { "epoch": 0.39952718676122934, "grad_norm": 4.8714280128479, "learning_rate": 1.50190503981094e-05, "loss": 0.2503, "step": 845 }, { "epoch": 0.4, "grad_norm": 6.177154541015625, "learning_rate": 1.5004764874258327e-05, "loss": 0.283, "step": 846 }, { "epoch": 0.4004728132387707, "grad_norm": 6.643271446228027, "learning_rate": 1.4990465710678015e-05, "loss": 0.3263, "step": 847 }, { "epoch": 0.4009456264775414, "grad_norm": 5.074257850646973, "learning_rate": 1.4976152946338673e-05, "loss": 0.2613, "step": 848 }, { "epoch": 0.4014184397163121, "grad_norm": 4.194014072418213, "learning_rate": 1.4961826620247574e-05, "loss": 0.221, "step": 849 }, { "epoch": 0.40189125295508277, "grad_norm": 4.170263767242432, "learning_rate": 1.4947486771448955e-05, "loss": 0.2559, "step": 850 }, { "epoch": 0.40236406619385345, "grad_norm": 5.984470844268799, "learning_rate": 1.4933133439023903e-05, "loss": 0.3017, "step": 851 }, { "epoch": 0.40283687943262414, "grad_norm": 3.760006904602051, "learning_rate": 1.4918766662090248e-05, "loss": 0.197, "step": 852 }, { "epoch": 0.4033096926713948, "grad_norm": 4.673705101013184, "learning_rate": 1.4904386479802471e-05, "loss": 0.2784, "step": 853 }, { "epoch": 0.4037825059101655, "grad_norm": 8.06790828704834, "learning_rate": 1.4889992931351578e-05, "loss": 0.2297, "step": 854 }, { "epoch": 0.40425531914893614, "grad_norm": 3.7845892906188965, "learning_rate": 1.4875586055965014e-05, "loss": 0.2101, "step": 855 }, { "epoch": 0.40472813238770683, "grad_norm": 3.8769285678863525, "learning_rate": 1.4861165892906532e-05, "loss": 0.1641, "step": 856 }, { "epoch": 0.4052009456264775, "grad_norm": 3.3952763080596924, "learning_rate": 1.4846732481476105e-05, "loss": 0.1768, "step": 857 }, { "epoch": 0.4056737588652482, "grad_norm": 6.208580493927002, "learning_rate": 1.4832285861009812e-05, "loss": 0.1995, "step": 858 }, { "epoch": 0.4061465721040189, "grad_norm": 6.829061508178711, "learning_rate": 1.4817826070879732e-05, "loss": 0.3429, "step": 859 }, { "epoch": 0.4066193853427896, "grad_norm": 4.501508712768555, "learning_rate": 1.4803353150493834e-05, "loss": 0.185, "step": 860 }, { "epoch": 0.4066193853427896, "eval_accuracy": 0.8614190687361419, "eval_f1": 0.7203579418344519, "eval_loss": 0.3079231381416321, "eval_precision": 0.8298969072164949, "eval_recall": 0.6363636363636364, "eval_runtime": 48.0809, "eval_samples_per_second": 5.74, "eval_steps_per_second": 0.187, "step": 860 }, { "epoch": 0.40709219858156026, "grad_norm": 7.904217720031738, "learning_rate": 1.478886713929587e-05, "loss": 0.2896, "step": 861 }, { "epoch": 0.40756501182033095, "grad_norm": 5.54583740234375, "learning_rate": 1.4774368076765272e-05, "loss": 0.2334, "step": 862 }, { "epoch": 0.40803782505910163, "grad_norm": 6.930192470550537, "learning_rate": 1.4759856002417046e-05, "loss": 0.233, "step": 863 }, { "epoch": 0.4085106382978723, "grad_norm": 7.0124335289001465, "learning_rate": 1.4745330955801644e-05, "loss": 0.2996, "step": 864 }, { "epoch": 0.408983451536643, "grad_norm": 7.793242454528809, "learning_rate": 1.4730792976504892e-05, "loss": 0.1966, "step": 865 }, { "epoch": 0.4094562647754137, "grad_norm": 6.164129734039307, "learning_rate": 1.4716242104147849e-05, "loss": 0.2556, "step": 866 }, { "epoch": 0.4099290780141844, "grad_norm": 5.059127330780029, "learning_rate": 1.470167837838671e-05, "loss": 0.1843, "step": 867 }, { "epoch": 0.41040189125295506, "grad_norm": 7.891740798950195, "learning_rate": 1.4687101838912713e-05, "loss": 0.2942, "step": 868 }, { "epoch": 0.41087470449172575, "grad_norm": 8.02418327331543, "learning_rate": 1.467251252545201e-05, "loss": 0.2544, "step": 869 }, { "epoch": 0.41134751773049644, "grad_norm": 7.103123188018799, "learning_rate": 1.4657910477765564e-05, "loss": 0.2167, "step": 870 }, { "epoch": 0.4118203309692671, "grad_norm": 6.880304336547852, "learning_rate": 1.4643295735649044e-05, "loss": 0.3523, "step": 871 }, { "epoch": 0.4122931442080378, "grad_norm": 5.1397576332092285, "learning_rate": 1.4628668338932721e-05, "loss": 0.2939, "step": 872 }, { "epoch": 0.4127659574468085, "grad_norm": 4.4353346824646, "learning_rate": 1.461402832748135e-05, "loss": 0.2673, "step": 873 }, { "epoch": 0.4132387706855792, "grad_norm": 4.128648281097412, "learning_rate": 1.4599375741194069e-05, "loss": 0.1686, "step": 874 }, { "epoch": 0.41371158392434987, "grad_norm": 5.588024616241455, "learning_rate": 1.4584710620004284e-05, "loss": 0.2412, "step": 875 }, { "epoch": 0.41418439716312055, "grad_norm": 5.182522296905518, "learning_rate": 1.4570033003879556e-05, "loss": 0.2453, "step": 876 }, { "epoch": 0.41465721040189124, "grad_norm": 4.976614475250244, "learning_rate": 1.4555342932821517e-05, "loss": 0.2493, "step": 877 }, { "epoch": 0.4151300236406619, "grad_norm": 6.306532859802246, "learning_rate": 1.4540640446865723e-05, "loss": 0.2481, "step": 878 }, { "epoch": 0.4156028368794326, "grad_norm": 4.86607027053833, "learning_rate": 1.4525925586081584e-05, "loss": 0.1933, "step": 879 }, { "epoch": 0.4160756501182033, "grad_norm": 4.547597885131836, "learning_rate": 1.4511198390572219e-05, "loss": 0.189, "step": 880 }, { "epoch": 0.4160756501182033, "eval_accuracy": 0.8503325942350333, "eval_f1": 0.6666666666666666, "eval_loss": 0.32481706142425537, "eval_precision": 0.8881578947368421, "eval_recall": 0.5335968379446641, "eval_runtime": 48.5726, "eval_samples_per_second": 5.682, "eval_steps_per_second": 0.185, "step": 880 }, { "epoch": 0.416548463356974, "grad_norm": 6.4413886070251465, "learning_rate": 1.4496458900474371e-05, "loss": 0.2284, "step": 881 }, { "epoch": 0.41702127659574467, "grad_norm": 3.7408576011657715, "learning_rate": 1.4481707155958291e-05, "loss": 0.1963, "step": 882 }, { "epoch": 0.41749408983451536, "grad_norm": 5.2726664543151855, "learning_rate": 1.446694319722763e-05, "loss": 0.2463, "step": 883 }, { "epoch": 0.41796690307328604, "grad_norm": 4.192355155944824, "learning_rate": 1.4452167064519316e-05, "loss": 0.2065, "step": 884 }, { "epoch": 0.41843971631205673, "grad_norm": 7.111584663391113, "learning_rate": 1.4437378798103467e-05, "loss": 0.3013, "step": 885 }, { "epoch": 0.4189125295508274, "grad_norm": 7.128089427947998, "learning_rate": 1.4422578438283263e-05, "loss": 0.2477, "step": 886 }, { "epoch": 0.4193853427895981, "grad_norm": 6.053483486175537, "learning_rate": 1.4407766025394847e-05, "loss": 0.2003, "step": 887 }, { "epoch": 0.4198581560283688, "grad_norm": 6.564062118530273, "learning_rate": 1.4392941599807206e-05, "loss": 0.2808, "step": 888 }, { "epoch": 0.4203309692671395, "grad_norm": 4.815242290496826, "learning_rate": 1.4378105201922073e-05, "loss": 0.1874, "step": 889 }, { "epoch": 0.42080378250591016, "grad_norm": 6.174993991851807, "learning_rate": 1.4363256872173801e-05, "loss": 0.1918, "step": 890 }, { "epoch": 0.42127659574468085, "grad_norm": 7.473939418792725, "learning_rate": 1.4348396651029261e-05, "loss": 0.2361, "step": 891 }, { "epoch": 0.42174940898345153, "grad_norm": 8.417937278747559, "learning_rate": 1.4333524578987748e-05, "loss": 0.4323, "step": 892 }, { "epoch": 0.4222222222222222, "grad_norm": 5.9651007652282715, "learning_rate": 1.4318640696580834e-05, "loss": 0.3207, "step": 893 }, { "epoch": 0.4226950354609929, "grad_norm": 4.948203086853027, "learning_rate": 1.4303745044372293e-05, "loss": 0.2782, "step": 894 }, { "epoch": 0.4231678486997636, "grad_norm": 9.178805351257324, "learning_rate": 1.4288837662957969e-05, "loss": 0.334, "step": 895 }, { "epoch": 0.4236406619385343, "grad_norm": 7.537435054779053, "learning_rate": 1.4273918592965674e-05, "loss": 0.3307, "step": 896 }, { "epoch": 0.42411347517730497, "grad_norm": 5.170799732208252, "learning_rate": 1.4258987875055077e-05, "loss": 0.2322, "step": 897 }, { "epoch": 0.42458628841607565, "grad_norm": 7.296963214874268, "learning_rate": 1.4244045549917587e-05, "loss": 0.292, "step": 898 }, { "epoch": 0.42505910165484634, "grad_norm": 5.456043720245361, "learning_rate": 1.422909165827625e-05, "loss": 0.2374, "step": 899 }, { "epoch": 0.425531914893617, "grad_norm": 4.878541946411133, "learning_rate": 1.421412624088564e-05, "loss": 0.299, "step": 900 }, { "epoch": 0.425531914893617, "eval_accuracy": 0.852549889135255, "eval_f1": 0.6825775656324582, "eval_loss": 0.31735506653785706, "eval_precision": 0.8614457831325302, "eval_recall": 0.5652173913043478, "eval_runtime": 48.299, "eval_samples_per_second": 5.714, "eval_steps_per_second": 0.186, "step": 900 }, { "epoch": 0.4260047281323877, "grad_norm": 5.303489685058594, "learning_rate": 1.419914933853173e-05, "loss": 0.2548, "step": 901 }, { "epoch": 0.4264775413711584, "grad_norm": 5.416555404663086, "learning_rate": 1.4184160992031806e-05, "loss": 0.249, "step": 902 }, { "epoch": 0.4269503546099291, "grad_norm": 5.52853536605835, "learning_rate": 1.4169161242234335e-05, "loss": 0.2135, "step": 903 }, { "epoch": 0.42742316784869977, "grad_norm": 5.232771396636963, "learning_rate": 1.4154150130018867e-05, "loss": 0.2314, "step": 904 }, { "epoch": 0.42789598108747046, "grad_norm": 5.249035835266113, "learning_rate": 1.4139127696295913e-05, "loss": 0.188, "step": 905 }, { "epoch": 0.42836879432624114, "grad_norm": 8.240036010742188, "learning_rate": 1.4124093982006846e-05, "loss": 0.2678, "step": 906 }, { "epoch": 0.42884160756501183, "grad_norm": 5.175498008728027, "learning_rate": 1.410904902812378e-05, "loss": 0.2565, "step": 907 }, { "epoch": 0.4293144208037825, "grad_norm": 3.9959726333618164, "learning_rate": 1.4093992875649456e-05, "loss": 0.2413, "step": 908 }, { "epoch": 0.4297872340425532, "grad_norm": 3.8025238513946533, "learning_rate": 1.407892556561714e-05, "loss": 0.1705, "step": 909 }, { "epoch": 0.4302600472813239, "grad_norm": 5.208123683929443, "learning_rate": 1.4063847139090507e-05, "loss": 0.2492, "step": 910 }, { "epoch": 0.4307328605200946, "grad_norm": 4.154348850250244, "learning_rate": 1.4048757637163529e-05, "loss": 0.2182, "step": 911 }, { "epoch": 0.43120567375886526, "grad_norm": 5.2830939292907715, "learning_rate": 1.4033657100960356e-05, "loss": 0.2097, "step": 912 }, { "epoch": 0.43167848699763595, "grad_norm": 3.8644347190856934, "learning_rate": 1.4018545571635209e-05, "loss": 0.214, "step": 913 }, { "epoch": 0.43215130023640663, "grad_norm": 4.06352424621582, "learning_rate": 1.4003423090372286e-05, "loss": 0.2284, "step": 914 }, { "epoch": 0.4326241134751773, "grad_norm": 6.2407355308532715, "learning_rate": 1.3988289698385608e-05, "loss": 0.2216, "step": 915 }, { "epoch": 0.433096926713948, "grad_norm": 6.083385467529297, "learning_rate": 1.3973145436918957e-05, "loss": 0.268, "step": 916 }, { "epoch": 0.4335697399527187, "grad_norm": 7.127196311950684, "learning_rate": 1.3957990347245717e-05, "loss": 0.3019, "step": 917 }, { "epoch": 0.4340425531914894, "grad_norm": 4.245884418487549, "learning_rate": 1.3942824470668796e-05, "loss": 0.2615, "step": 918 }, { "epoch": 0.43451536643026006, "grad_norm": 6.33418083190918, "learning_rate": 1.3927647848520493e-05, "loss": 0.2592, "step": 919 }, { "epoch": 0.43498817966903075, "grad_norm": 6.671105861663818, "learning_rate": 1.3912460522162396e-05, "loss": 0.199, "step": 920 }, { "epoch": 0.43498817966903075, "eval_accuracy": 0.8392461197339246, "eval_f1": 0.6253229974160207, "eval_loss": 0.33865952491760254, "eval_precision": 0.9029850746268657, "eval_recall": 0.4782608695652174, "eval_runtime": 48.0155, "eval_samples_per_second": 5.748, "eval_steps_per_second": 0.187, "step": 920 }, { "epoch": 0.43546099290780144, "grad_norm": 4.163972854614258, "learning_rate": 1.3897262532985263e-05, "loss": 0.184, "step": 921 }, { "epoch": 0.4359338061465721, "grad_norm": 8.20583438873291, "learning_rate": 1.3882053922408915e-05, "loss": 0.288, "step": 922 }, { "epoch": 0.4364066193853428, "grad_norm": 5.573141098022461, "learning_rate": 1.3866834731882117e-05, "loss": 0.1807, "step": 923 }, { "epoch": 0.4368794326241135, "grad_norm": 9.644611358642578, "learning_rate": 1.3851605002882472e-05, "loss": 0.4276, "step": 924 }, { "epoch": 0.4373522458628842, "grad_norm": 7.489835739135742, "learning_rate": 1.38363647769163e-05, "loss": 0.3496, "step": 925 }, { "epoch": 0.43782505910165487, "grad_norm": 4.446575164794922, "learning_rate": 1.3821114095518529e-05, "loss": 0.1963, "step": 926 }, { "epoch": 0.43829787234042555, "grad_norm": 4.232187271118164, "learning_rate": 1.3805853000252584e-05, "loss": 0.2081, "step": 927 }, { "epoch": 0.43877068557919624, "grad_norm": 5.939121246337891, "learning_rate": 1.379058153271027e-05, "loss": 0.2361, "step": 928 }, { "epoch": 0.4392434988179669, "grad_norm": 8.863687515258789, "learning_rate": 1.3775299734511663e-05, "loss": 0.341, "step": 929 }, { "epoch": 0.4397163120567376, "grad_norm": 6.206582546234131, "learning_rate": 1.3760007647304987e-05, "loss": 0.23, "step": 930 }, { "epoch": 0.4401891252955083, "grad_norm": 7.478794574737549, "learning_rate": 1.3744705312766517e-05, "loss": 0.241, "step": 931 }, { "epoch": 0.440661938534279, "grad_norm": 9.208320617675781, "learning_rate": 1.3729392772600445e-05, "loss": 0.2495, "step": 932 }, { "epoch": 0.44113475177304967, "grad_norm": 5.460510730743408, "learning_rate": 1.3714070068538785e-05, "loss": 0.1938, "step": 933 }, { "epoch": 0.44160756501182036, "grad_norm": 6.056775093078613, "learning_rate": 1.3698737242341245e-05, "loss": 0.3128, "step": 934 }, { "epoch": 0.44208037825059104, "grad_norm": 6.535298824310303, "learning_rate": 1.3683394335795126e-05, "loss": 0.2466, "step": 935 }, { "epoch": 0.4425531914893617, "grad_norm": 6.023354530334473, "learning_rate": 1.3668041390715195e-05, "loss": 0.2496, "step": 936 }, { "epoch": 0.44302600472813236, "grad_norm": 5.573044300079346, "learning_rate": 1.365267844894358e-05, "loss": 0.2324, "step": 937 }, { "epoch": 0.44349881796690305, "grad_norm": 7.519514560699463, "learning_rate": 1.3637305552349656e-05, "loss": 0.3045, "step": 938 }, { "epoch": 0.44397163120567373, "grad_norm": 3.165480852127075, "learning_rate": 1.3621922742829923e-05, "loss": 0.1767, "step": 939 }, { "epoch": 0.4444444444444444, "grad_norm": 5.499924659729004, "learning_rate": 1.3606530062307902e-05, "loss": 0.2886, "step": 940 }, { "epoch": 0.4444444444444444, "eval_accuracy": 0.8381374722838137, "eval_f1": 0.6294416243654822, "eval_loss": 0.33129268884658813, "eval_precision": 0.8794326241134752, "eval_recall": 0.4901185770750988, "eval_runtime": 48.7368, "eval_samples_per_second": 5.663, "eval_steps_per_second": 0.185, "step": 940 }, { "epoch": 0.4449172576832151, "grad_norm": 4.935895919799805, "learning_rate": 1.3591127552734018e-05, "loss": 0.1841, "step": 941 }, { "epoch": 0.4453900709219858, "grad_norm": 5.519673824310303, "learning_rate": 1.3575715256085474e-05, "loss": 0.2634, "step": 942 }, { "epoch": 0.4458628841607565, "grad_norm": 5.731892108917236, "learning_rate": 1.3560293214366152e-05, "loss": 0.2458, "step": 943 }, { "epoch": 0.44633569739952716, "grad_norm": 6.215859413146973, "learning_rate": 1.3544861469606495e-05, "loss": 0.3181, "step": 944 }, { "epoch": 0.44680851063829785, "grad_norm": 5.27556848526001, "learning_rate": 1.352942006386339e-05, "loss": 0.2382, "step": 945 }, { "epoch": 0.44728132387706854, "grad_norm": 5.7444963455200195, "learning_rate": 1.351396903922005e-05, "loss": 0.218, "step": 946 }, { "epoch": 0.4477541371158392, "grad_norm": 6.035450458526611, "learning_rate": 1.3498508437785897e-05, "loss": 0.239, "step": 947 }, { "epoch": 0.4482269503546099, "grad_norm": 3.9959163665771484, "learning_rate": 1.3483038301696473e-05, "loss": 0.1595, "step": 948 }, { "epoch": 0.4486997635933806, "grad_norm": 3.841958999633789, "learning_rate": 1.3467558673113286e-05, "loss": 0.1573, "step": 949 }, { "epoch": 0.4491725768321513, "grad_norm": 4.796578884124756, "learning_rate": 1.345206959422372e-05, "loss": 0.2151, "step": 950 }, { "epoch": 0.44964539007092197, "grad_norm": 6.64060640335083, "learning_rate": 1.3436571107240919e-05, "loss": 0.2468, "step": 951 }, { "epoch": 0.45011820330969265, "grad_norm": 4.541578769683838, "learning_rate": 1.3421063254403657e-05, "loss": 0.2113, "step": 952 }, { "epoch": 0.45059101654846334, "grad_norm": 6.632504940032959, "learning_rate": 1.3405546077976249e-05, "loss": 0.2745, "step": 953 }, { "epoch": 0.451063829787234, "grad_norm": 4.1523284912109375, "learning_rate": 1.3390019620248403e-05, "loss": 0.1837, "step": 954 }, { "epoch": 0.4515366430260047, "grad_norm": 5.9142351150512695, "learning_rate": 1.3374483923535136e-05, "loss": 0.2732, "step": 955 }, { "epoch": 0.4520094562647754, "grad_norm": 4.184595584869385, "learning_rate": 1.335893903017663e-05, "loss": 0.2433, "step": 956 }, { "epoch": 0.4524822695035461, "grad_norm": 5.808665752410889, "learning_rate": 1.334338498253815e-05, "loss": 0.2497, "step": 957 }, { "epoch": 0.4529550827423168, "grad_norm": 4.390594959259033, "learning_rate": 1.332782182300989e-05, "loss": 0.2517, "step": 958 }, { "epoch": 0.45342789598108746, "grad_norm": 4.8440165519714355, "learning_rate": 1.3312249594006893e-05, "loss": 0.2194, "step": 959 }, { "epoch": 0.45390070921985815, "grad_norm": 7.286986827850342, "learning_rate": 1.3296668337968904e-05, "loss": 0.2641, "step": 960 }, { "epoch": 0.45390070921985815, "eval_accuracy": 0.8636363636363636, "eval_f1": 0.7159353348729792, "eval_loss": 0.3095405697822571, "eval_precision": 0.8611111111111112, "eval_recall": 0.6126482213438735, "eval_runtime": 48.7277, "eval_samples_per_second": 5.664, "eval_steps_per_second": 0.185, "step": 960 }, { "epoch": 0.45437352245862883, "grad_norm": 5.716742992401123, "learning_rate": 1.3281078097360287e-05, "loss": 0.1991, "step": 961 }, { "epoch": 0.4548463356973995, "grad_norm": 6.432254791259766, "learning_rate": 1.3265478914669878e-05, "loss": 0.2438, "step": 962 }, { "epoch": 0.4553191489361702, "grad_norm": 6.0999250411987305, "learning_rate": 1.3249870832410886e-05, "loss": 0.2145, "step": 963 }, { "epoch": 0.4557919621749409, "grad_norm": 6.828171730041504, "learning_rate": 1.323425389312079e-05, "loss": 0.2465, "step": 964 }, { "epoch": 0.4562647754137116, "grad_norm": 5.544858455657959, "learning_rate": 1.3218628139361178e-05, "loss": 0.1922, "step": 965 }, { "epoch": 0.45673758865248226, "grad_norm": 5.448679447174072, "learning_rate": 1.3202993613717688e-05, "loss": 0.2383, "step": 966 }, { "epoch": 0.45721040189125295, "grad_norm": 4.943000793457031, "learning_rate": 1.3187350358799846e-05, "loss": 0.2142, "step": 967 }, { "epoch": 0.45768321513002364, "grad_norm": 4.455641746520996, "learning_rate": 1.3171698417240984e-05, "loss": 0.2669, "step": 968 }, { "epoch": 0.4581560283687943, "grad_norm": 6.673210144042969, "learning_rate": 1.3156037831698094e-05, "loss": 0.2913, "step": 969 }, { "epoch": 0.458628841607565, "grad_norm": 4.279630661010742, "learning_rate": 1.3140368644851735e-05, "loss": 0.1963, "step": 970 }, { "epoch": 0.4591016548463357, "grad_norm": 6.872097969055176, "learning_rate": 1.3124690899405903e-05, "loss": 0.3115, "step": 971 }, { "epoch": 0.4595744680851064, "grad_norm": 8.577292442321777, "learning_rate": 1.3109004638087919e-05, "loss": 0.2241, "step": 972 }, { "epoch": 0.46004728132387707, "grad_norm": 6.13325834274292, "learning_rate": 1.3093309903648316e-05, "loss": 0.2008, "step": 973 }, { "epoch": 0.46052009456264775, "grad_norm": 7.323633193969727, "learning_rate": 1.3077606738860719e-05, "loss": 0.2284, "step": 974 }, { "epoch": 0.46099290780141844, "grad_norm": 5.5575642585754395, "learning_rate": 1.3061895186521724e-05, "loss": 0.2657, "step": 975 }, { "epoch": 0.4614657210401891, "grad_norm": 6.12337064743042, "learning_rate": 1.304617528945079e-05, "loss": 0.2165, "step": 976 }, { "epoch": 0.4619385342789598, "grad_norm": 6.96366024017334, "learning_rate": 1.3030447090490117e-05, "loss": 0.321, "step": 977 }, { "epoch": 0.4624113475177305, "grad_norm": 6.608788967132568, "learning_rate": 1.3014710632504533e-05, "loss": 0.2851, "step": 978 }, { "epoch": 0.4628841607565012, "grad_norm": 7.216396808624268, "learning_rate": 1.299896595838137e-05, "loss": 0.3212, "step": 979 }, { "epoch": 0.46335697399527187, "grad_norm": 5.307373523712158, "learning_rate": 1.2983213111030355e-05, "loss": 0.2316, "step": 980 }, { "epoch": 0.46335697399527187, "eval_accuracy": 0.8603104212860311, "eval_f1": 0.71875, "eval_loss": 0.3029595613479614, "eval_precision": 0.8256410256410256, "eval_recall": 0.6363636363636364, "eval_runtime": 48.9893, "eval_samples_per_second": 5.634, "eval_steps_per_second": 0.184, "step": 980 }, { "epoch": 0.46382978723404256, "grad_norm": 7.238687992095947, "learning_rate": 1.2967452133383494e-05, "loss": 0.3382, "step": 981 }, { "epoch": 0.46430260047281324, "grad_norm": 5.481350421905518, "learning_rate": 1.2951683068394941e-05, "loss": 0.2074, "step": 982 }, { "epoch": 0.46477541371158393, "grad_norm": 6.286655426025391, "learning_rate": 1.2935905959040898e-05, "loss": 0.2536, "step": 983 }, { "epoch": 0.4652482269503546, "grad_norm": 7.73200798034668, "learning_rate": 1.2920120848319483e-05, "loss": 0.2815, "step": 984 }, { "epoch": 0.4657210401891253, "grad_norm": 5.538710117340088, "learning_rate": 1.2904327779250638e-05, "loss": 0.2503, "step": 985 }, { "epoch": 0.466193853427896, "grad_norm": 8.157992362976074, "learning_rate": 1.2888526794875975e-05, "loss": 0.2675, "step": 986 }, { "epoch": 0.4666666666666667, "grad_norm": 4.824194431304932, "learning_rate": 1.2872717938258688e-05, "loss": 0.2185, "step": 987 }, { "epoch": 0.46713947990543736, "grad_norm": 3.831620931625366, "learning_rate": 1.285690125248342e-05, "loss": 0.2046, "step": 988 }, { "epoch": 0.46761229314420805, "grad_norm": 5.231266498565674, "learning_rate": 1.2841076780656155e-05, "loss": 0.2472, "step": 989 }, { "epoch": 0.46808510638297873, "grad_norm": 6.9529194831848145, "learning_rate": 1.28252445659041e-05, "loss": 0.2855, "step": 990 }, { "epoch": 0.4685579196217494, "grad_norm": 6.860682964324951, "learning_rate": 1.2809404651375554e-05, "loss": 0.2526, "step": 991 }, { "epoch": 0.4690307328605201, "grad_norm": 6.531607627868652, "learning_rate": 1.2793557080239819e-05, "loss": 0.266, "step": 992 }, { "epoch": 0.4695035460992908, "grad_norm": 4.6222758293151855, "learning_rate": 1.2777701895687034e-05, "loss": 0.2346, "step": 993 }, { "epoch": 0.4699763593380615, "grad_norm": 5.676296710968018, "learning_rate": 1.2761839140928119e-05, "loss": 0.3332, "step": 994 }, { "epoch": 0.47044917257683216, "grad_norm": 8.922492027282715, "learning_rate": 1.2745968859194604e-05, "loss": 0.2986, "step": 995 }, { "epoch": 0.47092198581560285, "grad_norm": 3.270632266998291, "learning_rate": 1.2730091093738545e-05, "loss": 0.122, "step": 996 }, { "epoch": 0.47139479905437354, "grad_norm": 4.889394283294678, "learning_rate": 1.2714205887832388e-05, "loss": 0.2348, "step": 997 }, { "epoch": 0.4718676122931442, "grad_norm": 6.802956581115723, "learning_rate": 1.2698313284768852e-05, "loss": 0.2074, "step": 998 }, { "epoch": 0.4723404255319149, "grad_norm": 5.15386962890625, "learning_rate": 1.2682413327860827e-05, "loss": 0.2129, "step": 999 }, { "epoch": 0.4728132387706856, "grad_norm": 4.577718257904053, "learning_rate": 1.2666506060441237e-05, "loss": 0.2116, "step": 1000 }, { "epoch": 0.4728132387706856, "eval_accuracy": 0.8580931263858093, "eval_f1": 0.7009345794392523, "eval_loss": 0.3230363726615906, "eval_precision": 0.8571428571428571, "eval_recall": 0.5928853754940712, "eval_runtime": 48.9793, "eval_samples_per_second": 5.635, "eval_steps_per_second": 0.184, "step": 1000 }, { "epoch": 0.4732860520094563, "grad_norm": 6.338871479034424, "learning_rate": 1.2650591525862934e-05, "loss": 0.2665, "step": 1001 }, { "epoch": 0.47375886524822697, "grad_norm": 6.019141674041748, "learning_rate": 1.2634669767498573e-05, "loss": 0.2079, "step": 1002 }, { "epoch": 0.47423167848699765, "grad_norm": 4.347167015075684, "learning_rate": 1.2618740828740494e-05, "loss": 0.1908, "step": 1003 }, { "epoch": 0.47470449172576834, "grad_norm": 6.210932731628418, "learning_rate": 1.2602804753000611e-05, "loss": 0.1847, "step": 1004 }, { "epoch": 0.475177304964539, "grad_norm": 5.755384922027588, "learning_rate": 1.2586861583710289e-05, "loss": 0.2592, "step": 1005 }, { "epoch": 0.4756501182033097, "grad_norm": 7.43326473236084, "learning_rate": 1.2570911364320218e-05, "loss": 0.2216, "step": 1006 }, { "epoch": 0.4761229314420804, "grad_norm": 6.543978214263916, "learning_rate": 1.2554954138300307e-05, "loss": 0.2118, "step": 1007 }, { "epoch": 0.4765957446808511, "grad_norm": 4.375254154205322, "learning_rate": 1.2538989949139567e-05, "loss": 0.1908, "step": 1008 }, { "epoch": 0.47706855791962177, "grad_norm": 6.078047275543213, "learning_rate": 1.2523018840345972e-05, "loss": 0.2619, "step": 1009 }, { "epoch": 0.47754137115839246, "grad_norm": 4.737030506134033, "learning_rate": 1.2507040855446371e-05, "loss": 0.1731, "step": 1010 }, { "epoch": 0.47801418439716314, "grad_norm": 5.818294525146484, "learning_rate": 1.2491056037986334e-05, "loss": 0.2438, "step": 1011 }, { "epoch": 0.47848699763593383, "grad_norm": 6.881172180175781, "learning_rate": 1.2475064431530066e-05, "loss": 0.2313, "step": 1012 }, { "epoch": 0.4789598108747045, "grad_norm": 5.162444591522217, "learning_rate": 1.245906607966027e-05, "loss": 0.2579, "step": 1013 }, { "epoch": 0.4794326241134752, "grad_norm": 8.821483612060547, "learning_rate": 1.2443061025978034e-05, "loss": 0.3318, "step": 1014 }, { "epoch": 0.4799054373522459, "grad_norm": 7.8657684326171875, "learning_rate": 1.2427049314102708e-05, "loss": 0.2404, "step": 1015 }, { "epoch": 0.4803782505910166, "grad_norm": 5.313066482543945, "learning_rate": 1.2411030987671791e-05, "loss": 0.1851, "step": 1016 }, { "epoch": 0.4808510638297872, "grad_norm": 6.415999412536621, "learning_rate": 1.2395006090340804e-05, "loss": 0.2219, "step": 1017 }, { "epoch": 0.4813238770685579, "grad_norm": 6.840671539306641, "learning_rate": 1.2378974665783184e-05, "loss": 0.3221, "step": 1018 }, { "epoch": 0.4817966903073286, "grad_norm": 7.59630823135376, "learning_rate": 1.236293675769015e-05, "loss": 0.3566, "step": 1019 }, { "epoch": 0.48226950354609927, "grad_norm": 5.026065349578857, "learning_rate": 1.2346892409770594e-05, "loss": 0.2134, "step": 1020 }, { "epoch": 0.48226950354609927, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7327586206896551, "eval_loss": 0.3039931058883667, "eval_precision": 0.8056872037914692, "eval_recall": 0.6719367588932806, "eval_runtime": 49.0789, "eval_samples_per_second": 5.624, "eval_steps_per_second": 0.183, "step": 1020 }, { "epoch": 0.48274231678486995, "grad_norm": 5.331277370452881, "learning_rate": 1.2330841665750954e-05, "loss": 0.2262, "step": 1021 }, { "epoch": 0.48321513002364064, "grad_norm": 8.84730052947998, "learning_rate": 1.2314784569375114e-05, "loss": 0.3046, "step": 1022 }, { "epoch": 0.4836879432624113, "grad_norm": 6.711941719055176, "learning_rate": 1.2298721164404249e-05, "loss": 0.3397, "step": 1023 }, { "epoch": 0.484160756501182, "grad_norm": 5.642043113708496, "learning_rate": 1.2282651494616742e-05, "loss": 0.2586, "step": 1024 }, { "epoch": 0.4846335697399527, "grad_norm": 5.215063571929932, "learning_rate": 1.226657560380805e-05, "loss": 0.2402, "step": 1025 }, { "epoch": 0.4851063829787234, "grad_norm": 7.698766231536865, "learning_rate": 1.2250493535790574e-05, "loss": 0.304, "step": 1026 }, { "epoch": 0.48557919621749407, "grad_norm": 5.472048282623291, "learning_rate": 1.223440533439356e-05, "loss": 0.229, "step": 1027 }, { "epoch": 0.48605200945626476, "grad_norm": 4.721035480499268, "learning_rate": 1.2218311043462964e-05, "loss": 0.1878, "step": 1028 }, { "epoch": 0.48652482269503544, "grad_norm": 5.494324207305908, "learning_rate": 1.2202210706861346e-05, "loss": 0.2146, "step": 1029 }, { "epoch": 0.48699763593380613, "grad_norm": 5.183449745178223, "learning_rate": 1.218610436846773e-05, "loss": 0.2175, "step": 1030 }, { "epoch": 0.4874704491725768, "grad_norm": 8.610817909240723, "learning_rate": 1.216999207217751e-05, "loss": 0.3331, "step": 1031 }, { "epoch": 0.4879432624113475, "grad_norm": 6.789135456085205, "learning_rate": 1.21538738619023e-05, "loss": 0.3608, "step": 1032 }, { "epoch": 0.4884160756501182, "grad_norm": 3.8762876987457275, "learning_rate": 1.2137749781569857e-05, "loss": 0.2002, "step": 1033 }, { "epoch": 0.4888888888888889, "grad_norm": 5.960103511810303, "learning_rate": 1.2121619875123914e-05, "loss": 0.2497, "step": 1034 }, { "epoch": 0.48936170212765956, "grad_norm": 7.955074787139893, "learning_rate": 1.2105484186524088e-05, "loss": 0.3593, "step": 1035 }, { "epoch": 0.48983451536643025, "grad_norm": 4.501315116882324, "learning_rate": 1.2089342759745761e-05, "loss": 0.2412, "step": 1036 }, { "epoch": 0.49030732860520093, "grad_norm": 4.577963352203369, "learning_rate": 1.2073195638779944e-05, "loss": 0.2328, "step": 1037 }, { "epoch": 0.4907801418439716, "grad_norm": 18.703994750976562, "learning_rate": 1.2057042867633178e-05, "loss": 0.2931, "step": 1038 }, { "epoch": 0.4912529550827423, "grad_norm": 4.856638431549072, "learning_rate": 1.2040884490327391e-05, "loss": 0.2607, "step": 1039 }, { "epoch": 0.491725768321513, "grad_norm": 5.463403701782227, "learning_rate": 1.2024720550899798e-05, "loss": 0.2139, "step": 1040 }, { "epoch": 0.491725768321513, "eval_accuracy": 0.844789356984479, "eval_f1": 0.6446700507614214, "eval_loss": 0.3279857337474823, "eval_precision": 0.900709219858156, "eval_recall": 0.5019762845849802, "eval_runtime": 48.0379, "eval_samples_per_second": 5.745, "eval_steps_per_second": 0.187, "step": 1040 }, { "epoch": 0.4921985815602837, "grad_norm": 6.326286315917969, "learning_rate": 1.2008551093402763e-05, "loss": 0.264, "step": 1041 }, { "epoch": 0.49267139479905436, "grad_norm": 5.787569046020508, "learning_rate": 1.1992376161903705e-05, "loss": 0.228, "step": 1042 }, { "epoch": 0.49314420803782505, "grad_norm": 6.124124526977539, "learning_rate": 1.1976195800484945e-05, "loss": 0.1668, "step": 1043 }, { "epoch": 0.49361702127659574, "grad_norm": 5.056814670562744, "learning_rate": 1.1960010053243613e-05, "loss": 0.1894, "step": 1044 }, { "epoch": 0.4940898345153664, "grad_norm": 7.828837871551514, "learning_rate": 1.194381896429151e-05, "loss": 0.3602, "step": 1045 }, { "epoch": 0.4945626477541371, "grad_norm": 4.001469135284424, "learning_rate": 1.1927622577755003e-05, "loss": 0.1379, "step": 1046 }, { "epoch": 0.4950354609929078, "grad_norm": 7.635477542877197, "learning_rate": 1.191142093777489e-05, "loss": 0.244, "step": 1047 }, { "epoch": 0.4955082742316785, "grad_norm": 7.2881364822387695, "learning_rate": 1.1895214088506284e-05, "loss": 0.3006, "step": 1048 }, { "epoch": 0.49598108747044917, "grad_norm": 5.0428619384765625, "learning_rate": 1.1879002074118512e-05, "loss": 0.2994, "step": 1049 }, { "epoch": 0.49645390070921985, "grad_norm": 5.892991542816162, "learning_rate": 1.1862784938794951e-05, "loss": 0.229, "step": 1050 }, { "epoch": 0.49692671394799054, "grad_norm": 6.7257304191589355, "learning_rate": 1.184656272673296e-05, "loss": 0.3032, "step": 1051 }, { "epoch": 0.4973995271867612, "grad_norm": 6.495220184326172, "learning_rate": 1.1830335482143718e-05, "loss": 0.2918, "step": 1052 }, { "epoch": 0.4978723404255319, "grad_norm": 4.424355983734131, "learning_rate": 1.1814103249252124e-05, "loss": 0.2097, "step": 1053 }, { "epoch": 0.4983451536643026, "grad_norm": 4.175996780395508, "learning_rate": 1.1797866072296676e-05, "loss": 0.1882, "step": 1054 }, { "epoch": 0.4988179669030733, "grad_norm": 5.1931328773498535, "learning_rate": 1.1781623995529341e-05, "loss": 0.2526, "step": 1055 }, { "epoch": 0.49929078014184397, "grad_norm": 5.245265960693359, "learning_rate": 1.1765377063215436e-05, "loss": 0.204, "step": 1056 }, { "epoch": 0.49976359338061466, "grad_norm": 4.931206226348877, "learning_rate": 1.1749125319633523e-05, "loss": 0.2239, "step": 1057 }, { "epoch": 0.5002364066193853, "grad_norm": 4.764687538146973, "learning_rate": 1.1732868809075266e-05, "loss": 0.2257, "step": 1058 }, { "epoch": 0.500709219858156, "grad_norm": 6.108907222747803, "learning_rate": 1.1716607575845327e-05, "loss": 0.271, "step": 1059 }, { "epoch": 0.5011820330969267, "grad_norm": 5.150505065917969, "learning_rate": 1.1700341664261233e-05, "loss": 0.1949, "step": 1060 }, { "epoch": 0.5011820330969267, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7089201877934272, "eval_loss": 0.31160375475883484, "eval_precision": 0.8728323699421965, "eval_recall": 0.5968379446640316, "eval_runtime": 48.5889, "eval_samples_per_second": 5.68, "eval_steps_per_second": 0.185, "step": 1060 }, { "epoch": 0.5016548463356973, "grad_norm": 5.739040851593018, "learning_rate": 1.1684071118653262e-05, "loss": 0.238, "step": 1061 }, { "epoch": 0.502127659574468, "grad_norm": 4.2889862060546875, "learning_rate": 1.1667795983364332e-05, "loss": 0.1881, "step": 1062 }, { "epoch": 0.5026004728132387, "grad_norm": 6.57220458984375, "learning_rate": 1.1651516302749854e-05, "loss": 0.3294, "step": 1063 }, { "epoch": 0.5030732860520094, "grad_norm": 6.348330497741699, "learning_rate": 1.1635232121177637e-05, "loss": 0.2125, "step": 1064 }, { "epoch": 0.5035460992907801, "grad_norm": 4.948648452758789, "learning_rate": 1.1618943483027749e-05, "loss": 0.2409, "step": 1065 }, { "epoch": 0.5040189125295508, "grad_norm": 5.066147804260254, "learning_rate": 1.1602650432692417e-05, "loss": 0.317, "step": 1066 }, { "epoch": 0.5044917257683215, "grad_norm": 5.450560092926025, "learning_rate": 1.1586353014575875e-05, "loss": 0.2262, "step": 1067 }, { "epoch": 0.5049645390070922, "grad_norm": 4.708855152130127, "learning_rate": 1.1570051273094277e-05, "loss": 0.2362, "step": 1068 }, { "epoch": 0.5054373522458628, "grad_norm": 10.365764617919922, "learning_rate": 1.1553745252675541e-05, "loss": 0.3124, "step": 1069 }, { "epoch": 0.5059101654846335, "grad_norm": 3.386537790298462, "learning_rate": 1.153743499775927e-05, "loss": 0.121, "step": 1070 }, { "epoch": 0.5063829787234042, "grad_norm": 6.828433990478516, "learning_rate": 1.152112055279659e-05, "loss": 0.2726, "step": 1071 }, { "epoch": 0.5068557919621749, "grad_norm": 5.744606971740723, "learning_rate": 1.1504801962250055e-05, "loss": 0.2195, "step": 1072 }, { "epoch": 0.5073286052009456, "grad_norm": 4.951056480407715, "learning_rate": 1.1488479270593507e-05, "loss": 0.2528, "step": 1073 }, { "epoch": 0.5078014184397163, "grad_norm": 5.981780529022217, "learning_rate": 1.1472152522311974e-05, "loss": 0.2478, "step": 1074 }, { "epoch": 0.508274231678487, "grad_norm": 3.798823118209839, "learning_rate": 1.1455821761901544e-05, "loss": 0.2075, "step": 1075 }, { "epoch": 0.5087470449172576, "grad_norm": 6.254341125488281, "learning_rate": 1.1439487033869226e-05, "loss": 0.301, "step": 1076 }, { "epoch": 0.5092198581560283, "grad_norm": 6.011257648468018, "learning_rate": 1.1423148382732854e-05, "loss": 0.257, "step": 1077 }, { "epoch": 0.509692671394799, "grad_norm": 5.492880344390869, "learning_rate": 1.1406805853020944e-05, "loss": 0.2582, "step": 1078 }, { "epoch": 0.5101654846335697, "grad_norm": 4.94982385635376, "learning_rate": 1.139045948927259e-05, "loss": 0.2408, "step": 1079 }, { "epoch": 0.5106382978723404, "grad_norm": 4.847739219665527, "learning_rate": 1.1374109336037331e-05, "loss": 0.2255, "step": 1080 }, { "epoch": 0.5106382978723404, "eval_accuracy": 0.8592017738359202, "eval_f1": 0.7011764705882353, "eval_loss": 0.31947237253189087, "eval_precision": 0.8662790697674418, "eval_recall": 0.5889328063241107, "eval_runtime": 48.5383, "eval_samples_per_second": 5.686, "eval_steps_per_second": 0.185, "step": 1080 }, { "epoch": 0.5111111111111111, "grad_norm": 5.068375587463379, "learning_rate": 1.135775543787504e-05, "loss": 0.2263, "step": 1081 }, { "epoch": 0.5115839243498818, "grad_norm": 4.222960472106934, "learning_rate": 1.1341397839355786e-05, "loss": 0.2395, "step": 1082 }, { "epoch": 0.5120567375886524, "grad_norm": 4.976379871368408, "learning_rate": 1.1325036585059732e-05, "loss": 0.2215, "step": 1083 }, { "epoch": 0.5125295508274231, "grad_norm": 5.738669395446777, "learning_rate": 1.1308671719576997e-05, "loss": 0.22, "step": 1084 }, { "epoch": 0.5130023640661938, "grad_norm": 7.325643539428711, "learning_rate": 1.1292303287507546e-05, "loss": 0.2674, "step": 1085 }, { "epoch": 0.5134751773049645, "grad_norm": 7.6121320724487305, "learning_rate": 1.1275931333461065e-05, "loss": 0.3137, "step": 1086 }, { "epoch": 0.5139479905437352, "grad_norm": 4.593527793884277, "learning_rate": 1.1259555902056838e-05, "loss": 0.2287, "step": 1087 }, { "epoch": 0.5144208037825059, "grad_norm": 5.258584499359131, "learning_rate": 1.1243177037923623e-05, "loss": 0.2492, "step": 1088 }, { "epoch": 0.5148936170212766, "grad_norm": 6.010392189025879, "learning_rate": 1.1226794785699531e-05, "loss": 0.2251, "step": 1089 }, { "epoch": 0.5153664302600472, "grad_norm": 7.567986488342285, "learning_rate": 1.121040919003192e-05, "loss": 0.3043, "step": 1090 }, { "epoch": 0.5158392434988179, "grad_norm": 5.762569427490234, "learning_rate": 1.1194020295577246e-05, "loss": 0.2001, "step": 1091 }, { "epoch": 0.5163120567375886, "grad_norm": 8.211880683898926, "learning_rate": 1.1177628147000961e-05, "loss": 0.2575, "step": 1092 }, { "epoch": 0.5167848699763593, "grad_norm": 4.067295074462891, "learning_rate": 1.1161232788977385e-05, "loss": 0.1754, "step": 1093 }, { "epoch": 0.51725768321513, "grad_norm": 5.805228233337402, "learning_rate": 1.1144834266189585e-05, "loss": 0.2111, "step": 1094 }, { "epoch": 0.5177304964539007, "grad_norm": 4.621476650238037, "learning_rate": 1.1128432623329256e-05, "loss": 0.1363, "step": 1095 }, { "epoch": 0.5182033096926714, "grad_norm": 7.750375747680664, "learning_rate": 1.111202790509659e-05, "loss": 0.2732, "step": 1096 }, { "epoch": 0.518676122931442, "grad_norm": 4.444814682006836, "learning_rate": 1.1095620156200166e-05, "loss": 0.2107, "step": 1097 }, { "epoch": 0.5191489361702127, "grad_norm": 12.939567565917969, "learning_rate": 1.1079209421356816e-05, "loss": 0.2695, "step": 1098 }, { "epoch": 0.5196217494089834, "grad_norm": 7.756330966949463, "learning_rate": 1.1062795745291519e-05, "loss": 0.2247, "step": 1099 }, { "epoch": 0.5200945626477541, "grad_norm": 6.402958393096924, "learning_rate": 1.1046379172737264e-05, "loss": 0.2452, "step": 1100 }, { "epoch": 0.5200945626477541, "eval_accuracy": 0.8425720620842572, "eval_f1": 0.6395939086294417, "eval_loss": 0.34635570645332336, "eval_precision": 0.8936170212765957, "eval_recall": 0.4980237154150198, "eval_runtime": 47.8847, "eval_samples_per_second": 5.764, "eval_steps_per_second": 0.188, "step": 1100 }, { "epoch": 0.5205673758865248, "grad_norm": 7.110340118408203, "learning_rate": 1.1029959748434935e-05, "loss": 0.2357, "step": 1101 }, { "epoch": 0.5210401891252955, "grad_norm": 6.949429512023926, "learning_rate": 1.1013537517133184e-05, "loss": 0.3259, "step": 1102 }, { "epoch": 0.5215130023640662, "grad_norm": 5.027368068695068, "learning_rate": 1.0997112523588322e-05, "loss": 0.1423, "step": 1103 }, { "epoch": 0.5219858156028369, "grad_norm": 6.545793056488037, "learning_rate": 1.0980684812564183e-05, "loss": 0.1863, "step": 1104 }, { "epoch": 0.5224586288416075, "grad_norm": 5.906529903411865, "learning_rate": 1.0964254428832007e-05, "loss": 0.2981, "step": 1105 }, { "epoch": 0.5229314420803782, "grad_norm": 4.966193675994873, "learning_rate": 1.0947821417170313e-05, "loss": 0.2378, "step": 1106 }, { "epoch": 0.5234042553191489, "grad_norm": 5.323748588562012, "learning_rate": 1.0931385822364796e-05, "loss": 0.2183, "step": 1107 }, { "epoch": 0.5238770685579196, "grad_norm": 7.892477035522461, "learning_rate": 1.0914947689208171e-05, "loss": 0.3732, "step": 1108 }, { "epoch": 0.5243498817966903, "grad_norm": 4.786356449127197, "learning_rate": 1.0898507062500095e-05, "loss": 0.2391, "step": 1109 }, { "epoch": 0.524822695035461, "grad_norm": 6.325803279876709, "learning_rate": 1.0882063987047e-05, "loss": 0.2397, "step": 1110 }, { "epoch": 0.5252955082742317, "grad_norm": 6.990598201751709, "learning_rate": 1.0865618507662001e-05, "loss": 0.2782, "step": 1111 }, { "epoch": 0.5257683215130023, "grad_norm": 8.039189338684082, "learning_rate": 1.0849170669164764e-05, "loss": 0.262, "step": 1112 }, { "epoch": 0.526241134751773, "grad_norm": 6.111503601074219, "learning_rate": 1.0832720516381382e-05, "loss": 0.2201, "step": 1113 }, { "epoch": 0.5267139479905437, "grad_norm": 6.7883124351501465, "learning_rate": 1.0816268094144257e-05, "loss": 0.2615, "step": 1114 }, { "epoch": 0.5271867612293144, "grad_norm": 6.257448673248291, "learning_rate": 1.0799813447291979e-05, "loss": 0.232, "step": 1115 }, { "epoch": 0.5276595744680851, "grad_norm": 8.060059547424316, "learning_rate": 1.0783356620669195e-05, "loss": 0.2726, "step": 1116 }, { "epoch": 0.5281323877068558, "grad_norm": 4.382721424102783, "learning_rate": 1.0766897659126491e-05, "loss": 0.2114, "step": 1117 }, { "epoch": 0.5286052009456265, "grad_norm": 5.4973859786987305, "learning_rate": 1.0750436607520287e-05, "loss": 0.2706, "step": 1118 }, { "epoch": 0.5290780141843971, "grad_norm": 8.051422119140625, "learning_rate": 1.0733973510712682e-05, "loss": 0.2354, "step": 1119 }, { "epoch": 0.5295508274231678, "grad_norm": 3.799506664276123, "learning_rate": 1.0717508413571349e-05, "loss": 0.2038, "step": 1120 }, { "epoch": 0.5295508274231678, "eval_accuracy": 0.8569844789356984, "eval_f1": 0.6921241050119332, "eval_loss": 0.31673863530158997, "eval_precision": 0.8734939759036144, "eval_recall": 0.5731225296442688, "eval_runtime": 48.009, "eval_samples_per_second": 5.749, "eval_steps_per_second": 0.187, "step": 1120 }, { "epoch": 0.5300236406619385, "grad_norm": 7.479004383087158, "learning_rate": 1.0701041360969428e-05, "loss": 0.2895, "step": 1121 }, { "epoch": 0.5304964539007092, "grad_norm": 4.519740104675293, "learning_rate": 1.068457239778537e-05, "loss": 0.2641, "step": 1122 }, { "epoch": 0.5309692671394799, "grad_norm": 5.979281425476074, "learning_rate": 1.0668101568902852e-05, "loss": 0.2297, "step": 1123 }, { "epoch": 0.5314420803782506, "grad_norm": 4.343296051025391, "learning_rate": 1.0651628919210615e-05, "loss": 0.1811, "step": 1124 }, { "epoch": 0.5319148936170213, "grad_norm": 5.795645713806152, "learning_rate": 1.063515449360238e-05, "loss": 0.2214, "step": 1125 }, { "epoch": 0.532387706855792, "grad_norm": 4.395986080169678, "learning_rate": 1.0618678336976695e-05, "loss": 0.2373, "step": 1126 }, { "epoch": 0.5328605200945626, "grad_norm": 4.035050392150879, "learning_rate": 1.0602200494236837e-05, "loss": 0.185, "step": 1127 }, { "epoch": 0.5333333333333333, "grad_norm": 5.2432780265808105, "learning_rate": 1.0585721010290668e-05, "loss": 0.201, "step": 1128 }, { "epoch": 0.533806146572104, "grad_norm": 6.4242777824401855, "learning_rate": 1.0569239930050532e-05, "loss": 0.2681, "step": 1129 }, { "epoch": 0.5342789598108747, "grad_norm": 6.040828227996826, "learning_rate": 1.0552757298433113e-05, "loss": 0.1799, "step": 1130 }, { "epoch": 0.5347517730496454, "grad_norm": 8.409934043884277, "learning_rate": 1.0536273160359335e-05, "loss": 0.3153, "step": 1131 }, { "epoch": 0.5352245862884161, "grad_norm": 5.49470853805542, "learning_rate": 1.0519787560754215e-05, "loss": 0.2344, "step": 1132 }, { "epoch": 0.5356973995271868, "grad_norm": 4.8799967765808105, "learning_rate": 1.050330054454677e-05, "loss": 0.1693, "step": 1133 }, { "epoch": 0.5361702127659574, "grad_norm": 7.70962381362915, "learning_rate": 1.0486812156669859e-05, "loss": 0.1999, "step": 1134 }, { "epoch": 0.5366430260047281, "grad_norm": 6.684405326843262, "learning_rate": 1.0470322442060089e-05, "loss": 0.1878, "step": 1135 }, { "epoch": 0.5371158392434988, "grad_norm": 7.799801826477051, "learning_rate": 1.045383144565768e-05, "loss": 0.2943, "step": 1136 }, { "epoch": 0.5375886524822695, "grad_norm": 9.22608470916748, "learning_rate": 1.043733921240635e-05, "loss": 0.3668, "step": 1137 }, { "epoch": 0.5380614657210402, "grad_norm": 5.817656517028809, "learning_rate": 1.0420845787253189e-05, "loss": 0.2449, "step": 1138 }, { "epoch": 0.5385342789598109, "grad_norm": 9.814664840698242, "learning_rate": 1.0404351215148523e-05, "loss": 0.3372, "step": 1139 }, { "epoch": 0.5390070921985816, "grad_norm": 4.843449592590332, "learning_rate": 1.0387855541045815e-05, "loss": 0.2496, "step": 1140 }, { "epoch": 0.5390070921985816, "eval_accuracy": 0.8592017738359202, "eval_f1": 0.6968973747016707, "eval_loss": 0.31810781359672546, "eval_precision": 0.8795180722891566, "eval_recall": 0.5770750988142292, "eval_runtime": 49.0428, "eval_samples_per_second": 5.628, "eval_steps_per_second": 0.184, "step": 1140 }, { "epoch": 0.5394799054373522, "grad_norm": 6.5451765060424805, "learning_rate": 1.0371358809901529e-05, "loss": 0.266, "step": 1141 }, { "epoch": 0.5399527186761229, "grad_norm": 4.096044540405273, "learning_rate": 1.0354861066675008e-05, "loss": 0.1938, "step": 1142 }, { "epoch": 0.5404255319148936, "grad_norm": 5.981978416442871, "learning_rate": 1.0338362356328355e-05, "loss": 0.2721, "step": 1143 }, { "epoch": 0.5408983451536643, "grad_norm": 4.459275245666504, "learning_rate": 1.0321862723826311e-05, "loss": 0.2085, "step": 1144 }, { "epoch": 0.541371158392435, "grad_norm": 4.302639484405518, "learning_rate": 1.0305362214136122e-05, "loss": 0.2267, "step": 1145 }, { "epoch": 0.5418439716312057, "grad_norm": 8.027523040771484, "learning_rate": 1.028886087222743e-05, "loss": 0.3361, "step": 1146 }, { "epoch": 0.5423167848699764, "grad_norm": 6.380166530609131, "learning_rate": 1.0272358743072152e-05, "loss": 0.2274, "step": 1147 }, { "epoch": 0.542789598108747, "grad_norm": 7.479015827178955, "learning_rate": 1.0255855871644338e-05, "loss": 0.3562, "step": 1148 }, { "epoch": 0.5432624113475177, "grad_norm": 4.3820295333862305, "learning_rate": 1.0239352302920067e-05, "loss": 0.1709, "step": 1149 }, { "epoch": 0.5437352245862884, "grad_norm": 6.630291938781738, "learning_rate": 1.0222848081877316e-05, "loss": 0.2615, "step": 1150 }, { "epoch": 0.5442080378250591, "grad_norm": 5.88150691986084, "learning_rate": 1.0206343253495848e-05, "loss": 0.2611, "step": 1151 }, { "epoch": 0.5446808510638298, "grad_norm": 6.246159553527832, "learning_rate": 1.0189837862757068e-05, "loss": 0.2713, "step": 1152 }, { "epoch": 0.5451536643026005, "grad_norm": 6.391038417816162, "learning_rate": 1.0173331954643926e-05, "loss": 0.1998, "step": 1153 }, { "epoch": 0.5456264775413712, "grad_norm": 5.693717002868652, "learning_rate": 1.0156825574140769e-05, "loss": 0.2219, "step": 1154 }, { "epoch": 0.5460992907801419, "grad_norm": 4.549108982086182, "learning_rate": 1.0140318766233247e-05, "loss": 0.239, "step": 1155 }, { "epoch": 0.5465721040189125, "grad_norm": 7.300600528717041, "learning_rate": 1.0123811575908166e-05, "loss": 0.3028, "step": 1156 }, { "epoch": 0.5470449172576832, "grad_norm": 4.2444071769714355, "learning_rate": 1.0107304048153372e-05, "loss": 0.1432, "step": 1157 }, { "epoch": 0.5475177304964539, "grad_norm": 5.09889030456543, "learning_rate": 1.0090796227957633e-05, "loss": 0.2697, "step": 1158 }, { "epoch": 0.5479905437352246, "grad_norm": 5.837294101715088, "learning_rate": 1.0074288160310514e-05, "loss": 0.2371, "step": 1159 }, { "epoch": 0.5484633569739953, "grad_norm": 6.027414798736572, "learning_rate": 1.0057779890202259e-05, "loss": 0.2864, "step": 1160 }, { "epoch": 0.5484633569739953, "eval_accuracy": 0.8514412416851441, "eval_f1": 0.6731707317073171, "eval_loss": 0.32011911273002625, "eval_precision": 0.8789808917197452, "eval_recall": 0.5454545454545454, "eval_runtime": 48.0201, "eval_samples_per_second": 5.748, "eval_steps_per_second": 0.187, "step": 1160 }, { "epoch": 0.548936170212766, "grad_norm": 3.9856438636779785, "learning_rate": 1.0041271462623658e-05, "loss": 0.2113, "step": 1161 }, { "epoch": 0.5494089834515367, "grad_norm": 4.562050819396973, "learning_rate": 1.0024762922565933e-05, "loss": 0.2173, "step": 1162 }, { "epoch": 0.5498817966903073, "grad_norm": 4.3589558601379395, "learning_rate": 1.0008254315020607e-05, "loss": 0.185, "step": 1163 }, { "epoch": 0.550354609929078, "grad_norm": 5.3740620613098145, "learning_rate": 9.991745684979394e-06, "loss": 0.2472, "step": 1164 }, { "epoch": 0.5508274231678487, "grad_norm": 5.081512451171875, "learning_rate": 9.97523707743407e-06, "loss": 0.219, "step": 1165 }, { "epoch": 0.5513002364066194, "grad_norm": 4.1024346351623535, "learning_rate": 9.958728537376345e-06, "loss": 0.1668, "step": 1166 }, { "epoch": 0.5517730496453901, "grad_norm": 3.816474199295044, "learning_rate": 9.942220109797746e-06, "loss": 0.2022, "step": 1167 }, { "epoch": 0.5522458628841608, "grad_norm": 5.035168647766113, "learning_rate": 9.925711839689487e-06, "loss": 0.2188, "step": 1168 }, { "epoch": 0.5527186761229315, "grad_norm": 5.621501922607422, "learning_rate": 9.909203772042369e-06, "loss": 0.2612, "step": 1169 }, { "epoch": 0.5531914893617021, "grad_norm": 3.7916884422302246, "learning_rate": 9.892695951846631e-06, "loss": 0.1537, "step": 1170 }, { "epoch": 0.5536643026004728, "grad_norm": 6.460813045501709, "learning_rate": 9.876188424091837e-06, "loss": 0.2258, "step": 1171 }, { "epoch": 0.5541371158392435, "grad_norm": 5.5038604736328125, "learning_rate": 9.859681233766756e-06, "loss": 0.1853, "step": 1172 }, { "epoch": 0.5546099290780142, "grad_norm": 9.788790702819824, "learning_rate": 9.843174425859231e-06, "loss": 0.3384, "step": 1173 }, { "epoch": 0.5550827423167849, "grad_norm": 8.492478370666504, "learning_rate": 9.826668045356078e-06, "loss": 0.2906, "step": 1174 }, { "epoch": 0.5555555555555556, "grad_norm": 5.153669357299805, "learning_rate": 9.810162137242935e-06, "loss": 0.215, "step": 1175 }, { "epoch": 0.5560283687943263, "grad_norm": 6.732087135314941, "learning_rate": 9.793656746504155e-06, "loss": 0.2921, "step": 1176 }, { "epoch": 0.556501182033097, "grad_norm": 4.027410507202148, "learning_rate": 9.777151918122684e-06, "loss": 0.1983, "step": 1177 }, { "epoch": 0.5569739952718676, "grad_norm": 5.1011061668396, "learning_rate": 9.760647697079936e-06, "loss": 0.2095, "step": 1178 }, { "epoch": 0.5574468085106383, "grad_norm": 5.0621538162231445, "learning_rate": 9.744144128355665e-06, "loss": 0.1637, "step": 1179 }, { "epoch": 0.557919621749409, "grad_norm": 6.033471584320068, "learning_rate": 9.72764125692785e-06, "loss": 0.2342, "step": 1180 }, { "epoch": 0.557919621749409, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7252252252252253, "eval_loss": 0.3139636218547821, "eval_precision": 0.8429319371727748, "eval_recall": 0.6363636363636364, "eval_runtime": 48.235, "eval_samples_per_second": 5.722, "eval_steps_per_second": 0.187, "step": 1180 }, { "epoch": 0.5583924349881797, "grad_norm": 6.157944679260254, "learning_rate": 9.711139127772568e-06, "loss": 0.2866, "step": 1181 }, { "epoch": 0.5588652482269504, "grad_norm": 5.42804479598999, "learning_rate": 9.69463778586388e-06, "loss": 0.1551, "step": 1182 }, { "epoch": 0.5593380614657211, "grad_norm": 5.639116287231445, "learning_rate": 9.678137276173692e-06, "loss": 0.1956, "step": 1183 }, { "epoch": 0.5598108747044918, "grad_norm": 6.913265705108643, "learning_rate": 9.661637643671647e-06, "loss": 0.2281, "step": 1184 }, { "epoch": 0.5602836879432624, "grad_norm": 5.190281867980957, "learning_rate": 9.645138933324994e-06, "loss": 0.1959, "step": 1185 }, { "epoch": 0.5607565011820331, "grad_norm": 4.637751579284668, "learning_rate": 9.628641190098473e-06, "loss": 0.2019, "step": 1186 }, { "epoch": 0.5612293144208038, "grad_norm": 4.302716255187988, "learning_rate": 9.612144458954189e-06, "loss": 0.1699, "step": 1187 }, { "epoch": 0.5617021276595745, "grad_norm": 6.4986395835876465, "learning_rate": 9.59564878485148e-06, "loss": 0.2111, "step": 1188 }, { "epoch": 0.5621749408983452, "grad_norm": 6.432104587554932, "learning_rate": 9.579154212746815e-06, "loss": 0.199, "step": 1189 }, { "epoch": 0.5626477541371159, "grad_norm": 5.450148105621338, "learning_rate": 9.56266078759365e-06, "loss": 0.2335, "step": 1190 }, { "epoch": 0.5631205673758866, "grad_norm": 5.353931427001953, "learning_rate": 9.546168554342323e-06, "loss": 0.1919, "step": 1191 }, { "epoch": 0.5635933806146572, "grad_norm": 5.608835220336914, "learning_rate": 9.529677557939916e-06, "loss": 0.2217, "step": 1192 }, { "epoch": 0.5640661938534279, "grad_norm": 7.61819314956665, "learning_rate": 9.513187843330146e-06, "loss": 0.2864, "step": 1193 }, { "epoch": 0.5645390070921986, "grad_norm": 7.839981555938721, "learning_rate": 9.496699455453232e-06, "loss": 0.2923, "step": 1194 }, { "epoch": 0.5650118203309693, "grad_norm": 4.617547035217285, "learning_rate": 9.480212439245785e-06, "loss": 0.1815, "step": 1195 }, { "epoch": 0.56548463356974, "grad_norm": 5.598609924316406, "learning_rate": 9.463726839640667e-06, "loss": 0.238, "step": 1196 }, { "epoch": 0.5659574468085107, "grad_norm": 5.537100791931152, "learning_rate": 9.44724270156689e-06, "loss": 0.1757, "step": 1197 }, { "epoch": 0.5664302600472814, "grad_norm": 4.509025573730469, "learning_rate": 9.430760069949473e-06, "loss": 0.2335, "step": 1198 }, { "epoch": 0.566903073286052, "grad_norm": 6.317657470703125, "learning_rate": 9.414278989709334e-06, "loss": 0.1729, "step": 1199 }, { "epoch": 0.5673758865248227, "grad_norm": 4.740533351898193, "learning_rate": 9.397799505763167e-06, "loss": 0.1366, "step": 1200 }, { "epoch": 0.5673758865248227, "eval_accuracy": 0.8680709534368071, "eval_f1": 0.7361419068736141, "eval_loss": 0.30103373527526855, "eval_precision": 0.8383838383838383, "eval_recall": 0.6561264822134387, "eval_runtime": 47.2489, "eval_samples_per_second": 5.841, "eval_steps_per_second": 0.19, "step": 1200 }, { "epoch": 0.5678486997635934, "grad_norm": 6.263066291809082, "learning_rate": 9.381321663023308e-06, "loss": 0.2202, "step": 1201 }, { "epoch": 0.5683215130023641, "grad_norm": 6.9543070793151855, "learning_rate": 9.364845506397625e-06, "loss": 0.1869, "step": 1202 }, { "epoch": 0.5687943262411348, "grad_norm": 4.8995513916015625, "learning_rate": 9.348371080789387e-06, "loss": 0.2227, "step": 1203 }, { "epoch": 0.5692671394799055, "grad_norm": 3.913970470428467, "learning_rate": 9.331898431097153e-06, "loss": 0.1941, "step": 1204 }, { "epoch": 0.5697399527186762, "grad_norm": 4.263607025146484, "learning_rate": 9.315427602214631e-06, "loss": 0.2026, "step": 1205 }, { "epoch": 0.5702127659574469, "grad_norm": 4.99878454208374, "learning_rate": 9.298958639030577e-06, "loss": 0.1717, "step": 1206 }, { "epoch": 0.5706855791962175, "grad_norm": 4.628468036651611, "learning_rate": 9.282491586428655e-06, "loss": 0.1845, "step": 1207 }, { "epoch": 0.5711583924349882, "grad_norm": 6.5551533699035645, "learning_rate": 9.266026489287323e-06, "loss": 0.2557, "step": 1208 }, { "epoch": 0.5716312056737589, "grad_norm": 5.44743013381958, "learning_rate": 9.249563392479715e-06, "loss": 0.2666, "step": 1209 }, { "epoch": 0.5721040189125296, "grad_norm": 5.58568000793457, "learning_rate": 9.23310234087351e-06, "loss": 0.2257, "step": 1210 }, { "epoch": 0.5725768321513003, "grad_norm": 7.3130574226379395, "learning_rate": 9.21664337933081e-06, "loss": 0.3227, "step": 1211 }, { "epoch": 0.573049645390071, "grad_norm": 6.498375415802002, "learning_rate": 9.200186552708023e-06, "loss": 0.2122, "step": 1212 }, { "epoch": 0.5735224586288417, "grad_norm": 5.47324275970459, "learning_rate": 9.183731905855746e-06, "loss": 0.243, "step": 1213 }, { "epoch": 0.5739952718676123, "grad_norm": 5.8507866859436035, "learning_rate": 9.167279483618623e-06, "loss": 0.1633, "step": 1214 }, { "epoch": 0.574468085106383, "grad_norm": 4.788534641265869, "learning_rate": 9.150829330835241e-06, "loss": 0.182, "step": 1215 }, { "epoch": 0.5749408983451537, "grad_norm": 7.402541160583496, "learning_rate": 9.134381492338e-06, "loss": 0.3063, "step": 1216 }, { "epoch": 0.5754137115839244, "grad_norm": 4.93443489074707, "learning_rate": 9.117936012953002e-06, "loss": 0.208, "step": 1217 }, { "epoch": 0.5758865248226951, "grad_norm": 8.154093742370605, "learning_rate": 9.101492937499909e-06, "loss": 0.2389, "step": 1218 }, { "epoch": 0.5763593380614658, "grad_norm": 7.1925368309021, "learning_rate": 9.08505231079183e-06, "loss": 0.3203, "step": 1219 }, { "epoch": 0.5768321513002365, "grad_norm": 7.500906467437744, "learning_rate": 9.068614177635211e-06, "loss": 0.2301, "step": 1220 }, { "epoch": 0.5768321513002365, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7142857142857143, "eval_loss": 0.30109164118766785, "eval_precision": 0.856353591160221, "eval_recall": 0.6126482213438735, "eval_runtime": 47.0544, "eval_samples_per_second": 5.866, "eval_steps_per_second": 0.191, "step": 1220 }, { "epoch": 0.577304964539007, "grad_norm": 5.455466270446777, "learning_rate": 9.052178582829687e-06, "loss": 0.2111, "step": 1221 }, { "epoch": 0.5777777777777777, "grad_norm": 4.1028618812561035, "learning_rate": 9.035745571167996e-06, "loss": 0.206, "step": 1222 }, { "epoch": 0.5782505910165484, "grad_norm": 4.987546443939209, "learning_rate": 9.01931518743582e-06, "loss": 0.2396, "step": 1223 }, { "epoch": 0.5787234042553191, "grad_norm": 9.012516975402832, "learning_rate": 9.002887476411681e-06, "loss": 0.3507, "step": 1224 }, { "epoch": 0.5791962174940898, "grad_norm": 6.798236846923828, "learning_rate": 8.986462482866817e-06, "loss": 0.2712, "step": 1225 }, { "epoch": 0.5796690307328605, "grad_norm": 5.508780479431152, "learning_rate": 8.970040251565068e-06, "loss": 0.2785, "step": 1226 }, { "epoch": 0.5801418439716312, "grad_norm": 3.585559606552124, "learning_rate": 8.953620827262739e-06, "loss": 0.1821, "step": 1227 }, { "epoch": 0.5806146572104018, "grad_norm": 4.184317588806152, "learning_rate": 8.937204254708486e-06, "loss": 0.2308, "step": 1228 }, { "epoch": 0.5810874704491725, "grad_norm": 4.5350518226623535, "learning_rate": 8.920790578643186e-06, "loss": 0.2078, "step": 1229 }, { "epoch": 0.5815602836879432, "grad_norm": 5.0740742683410645, "learning_rate": 8.904379843799838e-06, "loss": 0.2313, "step": 1230 }, { "epoch": 0.5820330969267139, "grad_norm": 6.0607147216796875, "learning_rate": 8.887972094903412e-06, "loss": 0.2334, "step": 1231 }, { "epoch": 0.5825059101654846, "grad_norm": 5.125598430633545, "learning_rate": 8.871567376670747e-06, "loss": 0.2739, "step": 1232 }, { "epoch": 0.5829787234042553, "grad_norm": 7.169873237609863, "learning_rate": 8.85516573381042e-06, "loss": 0.2739, "step": 1233 }, { "epoch": 0.583451536643026, "grad_norm": 6.202165603637695, "learning_rate": 8.838767211022616e-06, "loss": 0.3156, "step": 1234 }, { "epoch": 0.5839243498817966, "grad_norm": 5.986494541168213, "learning_rate": 8.82237185299904e-06, "loss": 0.1882, "step": 1235 }, { "epoch": 0.5843971631205673, "grad_norm": 4.3089470863342285, "learning_rate": 8.805979704422758e-06, "loss": 0.1905, "step": 1236 }, { "epoch": 0.584869976359338, "grad_norm": 4.750925540924072, "learning_rate": 8.789590809968082e-06, "loss": 0.2272, "step": 1237 }, { "epoch": 0.5853427895981087, "grad_norm": 4.529053688049316, "learning_rate": 8.773205214300469e-06, "loss": 0.2583, "step": 1238 }, { "epoch": 0.5858156028368794, "grad_norm": 5.315147876739502, "learning_rate": 8.756822962076382e-06, "loss": 0.2463, "step": 1239 }, { "epoch": 0.5862884160756501, "grad_norm": 5.5175909996032715, "learning_rate": 8.740444097943166e-06, "loss": 0.2873, "step": 1240 }, { "epoch": 0.5862884160756501, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7142857142857143, "eval_loss": 0.30490389466285706, "eval_precision": 0.856353591160221, "eval_recall": 0.6126482213438735, "eval_runtime": 47.7126, "eval_samples_per_second": 5.785, "eval_steps_per_second": 0.189, "step": 1240 }, { "epoch": 0.5867612293144208, "grad_norm": 5.6907572746276855, "learning_rate": 8.724068666538938e-06, "loss": 0.2456, "step": 1241 }, { "epoch": 0.5872340425531914, "grad_norm": 5.550398826599121, "learning_rate": 8.707696712492455e-06, "loss": 0.2122, "step": 1242 }, { "epoch": 0.5877068557919621, "grad_norm": 5.554051876068115, "learning_rate": 8.691328280423004e-06, "loss": 0.1672, "step": 1243 }, { "epoch": 0.5881796690307328, "grad_norm": 5.504934787750244, "learning_rate": 8.674963414940271e-06, "loss": 0.1918, "step": 1244 }, { "epoch": 0.5886524822695035, "grad_norm": 6.041418075561523, "learning_rate": 8.658602160644216e-06, "loss": 0.2718, "step": 1245 }, { "epoch": 0.5891252955082742, "grad_norm": 6.632382392883301, "learning_rate": 8.642244562124962e-06, "loss": 0.316, "step": 1246 }, { "epoch": 0.5895981087470449, "grad_norm": 4.766592502593994, "learning_rate": 8.625890663962669e-06, "loss": 0.2298, "step": 1247 }, { "epoch": 0.5900709219858156, "grad_norm": 5.895883560180664, "learning_rate": 8.609540510727412e-06, "loss": 0.2365, "step": 1248 }, { "epoch": 0.5905437352245863, "grad_norm": 5.390053749084473, "learning_rate": 8.593194146979059e-06, "loss": 0.1977, "step": 1249 }, { "epoch": 0.5910165484633569, "grad_norm": 7.53000020980835, "learning_rate": 8.576851617267151e-06, "loss": 0.2733, "step": 1250 }, { "epoch": 0.5914893617021276, "grad_norm": 5.143542766571045, "learning_rate": 8.560512966130775e-06, "loss": 0.2405, "step": 1251 }, { "epoch": 0.5919621749408983, "grad_norm": 5.577294826507568, "learning_rate": 8.544178238098458e-06, "loss": 0.2378, "step": 1252 }, { "epoch": 0.592434988179669, "grad_norm": 4.410736083984375, "learning_rate": 8.527847477688027e-06, "loss": 0.2437, "step": 1253 }, { "epoch": 0.5929078014184397, "grad_norm": 6.536932945251465, "learning_rate": 8.511520729406498e-06, "loss": 0.2503, "step": 1254 }, { "epoch": 0.5933806146572104, "grad_norm": 5.56400728225708, "learning_rate": 8.49519803774995e-06, "loss": 0.2324, "step": 1255 }, { "epoch": 0.5938534278959811, "grad_norm": 4.7766923904418945, "learning_rate": 8.478879447203411e-06, "loss": 0.1441, "step": 1256 }, { "epoch": 0.5943262411347517, "grad_norm": 5.121423721313477, "learning_rate": 8.462565002240733e-06, "loss": 0.2649, "step": 1257 }, { "epoch": 0.5947990543735224, "grad_norm": 4.3107404708862305, "learning_rate": 8.446254747324462e-06, "loss": 0.1711, "step": 1258 }, { "epoch": 0.5952718676122931, "grad_norm": 5.047919273376465, "learning_rate": 8.42994872690573e-06, "loss": 0.2281, "step": 1259 }, { "epoch": 0.5957446808510638, "grad_norm": 6.451530933380127, "learning_rate": 8.413646985424127e-06, "loss": 0.2467, "step": 1260 }, { "epoch": 0.5957446808510638, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7102803738317757, "eval_loss": 0.3106723129749298, "eval_precision": 0.8685714285714285, "eval_recall": 0.6007905138339921, "eval_runtime": 48.2804, "eval_samples_per_second": 5.717, "eval_steps_per_second": 0.186, "step": 1260 }, { "epoch": 0.5962174940898345, "grad_norm": 5.580362796783447, "learning_rate": 8.397349567307586e-06, "loss": 0.2108, "step": 1261 }, { "epoch": 0.5966903073286052, "grad_norm": 4.948899745941162, "learning_rate": 8.381056516972253e-06, "loss": 0.2347, "step": 1262 }, { "epoch": 0.5971631205673759, "grad_norm": 3.7529051303863525, "learning_rate": 8.364767878822368e-06, "loss": 0.1665, "step": 1263 }, { "epoch": 0.5976359338061465, "grad_norm": 7.008377552032471, "learning_rate": 8.34848369725015e-06, "loss": 0.2132, "step": 1264 }, { "epoch": 0.5981087470449172, "grad_norm": 5.252836227416992, "learning_rate": 8.332204016635672e-06, "loss": 0.1941, "step": 1265 }, { "epoch": 0.5985815602836879, "grad_norm": 6.382559776306152, "learning_rate": 8.31592888134674e-06, "loss": 0.2376, "step": 1266 }, { "epoch": 0.5990543735224586, "grad_norm": 6.636437892913818, "learning_rate": 8.299658335738772e-06, "loss": 0.3327, "step": 1267 }, { "epoch": 0.5995271867612293, "grad_norm": 5.242986679077148, "learning_rate": 8.28339242415468e-06, "loss": 0.2062, "step": 1268 }, { "epoch": 0.6, "grad_norm": 5.419365882873535, "learning_rate": 8.267131190924737e-06, "loss": 0.2488, "step": 1269 }, { "epoch": 0.6004728132387707, "grad_norm": 6.189310550689697, "learning_rate": 8.25087468036648e-06, "loss": 0.2743, "step": 1270 }, { "epoch": 0.6009456264775414, "grad_norm": 5.147764205932617, "learning_rate": 8.234622936784566e-06, "loss": 0.1907, "step": 1271 }, { "epoch": 0.601418439716312, "grad_norm": 5.64257287979126, "learning_rate": 8.218376004470665e-06, "loss": 0.2655, "step": 1272 }, { "epoch": 0.6018912529550827, "grad_norm": 3.5520944595336914, "learning_rate": 8.202133927703324e-06, "loss": 0.1818, "step": 1273 }, { "epoch": 0.6023640661938534, "grad_norm": 5.096825122833252, "learning_rate": 8.185896750747878e-06, "loss": 0.1918, "step": 1274 }, { "epoch": 0.6028368794326241, "grad_norm": 5.38516092300415, "learning_rate": 8.169664517856287e-06, "loss": 0.2708, "step": 1275 }, { "epoch": 0.6033096926713948, "grad_norm": 5.871916770935059, "learning_rate": 8.153437273267045e-06, "loss": 0.1947, "step": 1276 }, { "epoch": 0.6037825059101655, "grad_norm": 4.89730167388916, "learning_rate": 8.137215061205049e-06, "loss": 0.2103, "step": 1277 }, { "epoch": 0.6042553191489362, "grad_norm": 5.5777587890625, "learning_rate": 8.120997925881492e-06, "loss": 0.2599, "step": 1278 }, { "epoch": 0.6047281323877068, "grad_norm": 4.445948600769043, "learning_rate": 8.10478591149372e-06, "loss": 0.2191, "step": 1279 }, { "epoch": 0.6052009456264775, "grad_norm": 7.9579267501831055, "learning_rate": 8.088579062225116e-06, "loss": 0.3175, "step": 1280 }, { "epoch": 0.6052009456264775, "eval_accuracy": 0.8580931263858093, "eval_f1": 0.69377990430622, "eval_loss": 0.312032550573349, "eval_precision": 0.8787878787878788, "eval_recall": 0.5731225296442688, "eval_runtime": 50.2837, "eval_samples_per_second": 5.489, "eval_steps_per_second": 0.179, "step": 1280 }, { "epoch": 0.6056737588652482, "grad_norm": 3.942072629928589, "learning_rate": 8.072377422245002e-06, "loss": 0.1949, "step": 1281 }, { "epoch": 0.6061465721040189, "grad_norm": 4.524231910705566, "learning_rate": 8.05618103570849e-06, "loss": 0.1867, "step": 1282 }, { "epoch": 0.6066193853427896, "grad_norm": 5.806196689605713, "learning_rate": 8.039989946756388e-06, "loss": 0.2334, "step": 1283 }, { "epoch": 0.6070921985815603, "grad_norm": 5.2232489585876465, "learning_rate": 8.02380419951506e-06, "loss": 0.2788, "step": 1284 }, { "epoch": 0.607565011820331, "grad_norm": 6.341989517211914, "learning_rate": 8.0076238380963e-06, "loss": 0.2481, "step": 1285 }, { "epoch": 0.6080378250591016, "grad_norm": 5.141717433929443, "learning_rate": 7.991448906597237e-06, "loss": 0.2083, "step": 1286 }, { "epoch": 0.6085106382978723, "grad_norm": 5.809133052825928, "learning_rate": 7.975279449100207e-06, "loss": 0.2377, "step": 1287 }, { "epoch": 0.608983451536643, "grad_norm": 4.600372314453125, "learning_rate": 7.959115509672612e-06, "loss": 0.2026, "step": 1288 }, { "epoch": 0.6094562647754137, "grad_norm": 7.412517547607422, "learning_rate": 7.942957132366827e-06, "loss": 0.3106, "step": 1289 }, { "epoch": 0.6099290780141844, "grad_norm": 10.773149490356445, "learning_rate": 7.926804361220056e-06, "loss": 0.2309, "step": 1290 }, { "epoch": 0.6104018912529551, "grad_norm": 4.44931173324585, "learning_rate": 7.910657240254242e-06, "loss": 0.2072, "step": 1291 }, { "epoch": 0.6108747044917258, "grad_norm": 6.045795917510986, "learning_rate": 7.894515813475914e-06, "loss": 0.2879, "step": 1292 }, { "epoch": 0.6113475177304964, "grad_norm": 4.986977577209473, "learning_rate": 7.87838012487609e-06, "loss": 0.1959, "step": 1293 }, { "epoch": 0.6118203309692671, "grad_norm": 6.099925518035889, "learning_rate": 7.862250218430147e-06, "loss": 0.2966, "step": 1294 }, { "epoch": 0.6122931442080378, "grad_norm": 5.837856292724609, "learning_rate": 7.846126138097698e-06, "loss": 0.2563, "step": 1295 }, { "epoch": 0.6127659574468085, "grad_norm": 6.82401704788208, "learning_rate": 7.830007927822494e-06, "loss": 0.1892, "step": 1296 }, { "epoch": 0.6132387706855792, "grad_norm": 6.041834354400635, "learning_rate": 7.813895631532271e-06, "loss": 0.1974, "step": 1297 }, { "epoch": 0.6137115839243499, "grad_norm": 5.327773094177246, "learning_rate": 7.797789293138657e-06, "loss": 0.2551, "step": 1298 }, { "epoch": 0.6141843971631206, "grad_norm": 3.487072467803955, "learning_rate": 7.781688956537034e-06, "loss": 0.1987, "step": 1299 }, { "epoch": 0.6146572104018913, "grad_norm": 4.819819450378418, "learning_rate": 7.765594665606441e-06, "loss": 0.1988, "step": 1300 }, { "epoch": 0.6146572104018913, "eval_accuracy": 0.8636363636363636, "eval_f1": 0.7146171693735499, "eval_loss": 0.3020324409008026, "eval_precision": 0.8651685393258427, "eval_recall": 0.6086956521739131, "eval_runtime": 48.6185, "eval_samples_per_second": 5.677, "eval_steps_per_second": 0.185, "step": 1300 }, { "epoch": 0.6151300236406619, "grad_norm": 5.6234612464904785, "learning_rate": 7.749506464209428e-06, "loss": 0.2889, "step": 1301 }, { "epoch": 0.6156028368794326, "grad_norm": 5.582950592041016, "learning_rate": 7.733424396191955e-06, "loss": 0.2902, "step": 1302 }, { "epoch": 0.6160756501182033, "grad_norm": 5.391469955444336, "learning_rate": 7.71734850538326e-06, "loss": 0.213, "step": 1303 }, { "epoch": 0.616548463356974, "grad_norm": 4.6382060050964355, "learning_rate": 7.701278835595753e-06, "loss": 0.1684, "step": 1304 }, { "epoch": 0.6170212765957447, "grad_norm": 8.098640441894531, "learning_rate": 7.685215430624891e-06, "loss": 0.4206, "step": 1305 }, { "epoch": 0.6174940898345154, "grad_norm": 4.473232746124268, "learning_rate": 7.669158334249048e-06, "loss": 0.243, "step": 1306 }, { "epoch": 0.6179669030732861, "grad_norm": 5.509943008422852, "learning_rate": 7.65310759022941e-06, "loss": 0.1861, "step": 1307 }, { "epoch": 0.6184397163120567, "grad_norm": 6.489039421081543, "learning_rate": 7.637063242309852e-06, "loss": 0.2912, "step": 1308 }, { "epoch": 0.6189125295508274, "grad_norm": 4.711176872253418, "learning_rate": 7.621025334216819e-06, "loss": 0.243, "step": 1309 }, { "epoch": 0.6193853427895981, "grad_norm": 5.736166000366211, "learning_rate": 7.604993909659198e-06, "loss": 0.2759, "step": 1310 }, { "epoch": 0.6198581560283688, "grad_norm": 7.324904441833496, "learning_rate": 7.588969012328214e-06, "loss": 0.2655, "step": 1311 }, { "epoch": 0.6203309692671395, "grad_norm": 5.770148754119873, "learning_rate": 7.572950685897295e-06, "loss": 0.2062, "step": 1312 }, { "epoch": 0.6208037825059102, "grad_norm": 5.873038291931152, "learning_rate": 7.556938974021969e-06, "loss": 0.2604, "step": 1313 }, { "epoch": 0.6212765957446809, "grad_norm": 5.717566013336182, "learning_rate": 7.540933920339733e-06, "loss": 0.1932, "step": 1314 }, { "epoch": 0.6217494089834515, "grad_norm": 5.060842514038086, "learning_rate": 7.524935568469939e-06, "loss": 0.2813, "step": 1315 }, { "epoch": 0.6222222222222222, "grad_norm": 5.583745002746582, "learning_rate": 7.50894396201367e-06, "loss": 0.23, "step": 1316 }, { "epoch": 0.6226950354609929, "grad_norm": 10.26961898803711, "learning_rate": 7.4929591445536336e-06, "loss": 0.2003, "step": 1317 }, { "epoch": 0.6231678486997636, "grad_norm": 4.721004009246826, "learning_rate": 7.4769811596540285e-06, "loss": 0.1755, "step": 1318 }, { "epoch": 0.6236406619385343, "grad_norm": 4.784334659576416, "learning_rate": 7.461010050860438e-06, "loss": 0.2046, "step": 1319 }, { "epoch": 0.624113475177305, "grad_norm": 7.074143886566162, "learning_rate": 7.445045861699696e-06, "loss": 0.2081, "step": 1320 }, { "epoch": 0.624113475177305, "eval_accuracy": 0.8558758314855875, "eval_f1": 0.6859903381642513, "eval_loss": 0.31748807430267334, "eval_precision": 0.8819875776397516, "eval_recall": 0.5612648221343873, "eval_runtime": 47.9172, "eval_samples_per_second": 5.76, "eval_steps_per_second": 0.188, "step": 1320 }, { "epoch": 0.6245862884160757, "grad_norm": 6.400256156921387, "learning_rate": 7.429088635679786e-06, "loss": 0.2797, "step": 1321 }, { "epoch": 0.6250591016548463, "grad_norm": 7.948604583740234, "learning_rate": 7.413138416289716e-06, "loss": 0.2883, "step": 1322 }, { "epoch": 0.625531914893617, "grad_norm": 5.226047039031982, "learning_rate": 7.397195246999391e-06, "loss": 0.2944, "step": 1323 }, { "epoch": 0.6260047281323877, "grad_norm": 4.652298450469971, "learning_rate": 7.381259171259509e-06, "loss": 0.2375, "step": 1324 }, { "epoch": 0.6264775413711584, "grad_norm": 6.352631568908691, "learning_rate": 7.365330232501427e-06, "loss": 0.2923, "step": 1325 }, { "epoch": 0.6269503546099291, "grad_norm": 5.204030513763428, "learning_rate": 7.349408474137067e-06, "loss": 0.2485, "step": 1326 }, { "epoch": 0.6274231678486998, "grad_norm": 6.4170026779174805, "learning_rate": 7.333493939558764e-06, "loss": 0.3025, "step": 1327 }, { "epoch": 0.6278959810874705, "grad_norm": 5.421019077301025, "learning_rate": 7.317586672139177e-06, "loss": 0.2311, "step": 1328 }, { "epoch": 0.6283687943262412, "grad_norm": 6.363109111785889, "learning_rate": 7.301686715231149e-06, "loss": 0.244, "step": 1329 }, { "epoch": 0.6288416075650118, "grad_norm": 4.805910587310791, "learning_rate": 7.285794112167615e-06, "loss": 0.2314, "step": 1330 }, { "epoch": 0.6293144208037825, "grad_norm": 4.570178508758545, "learning_rate": 7.269908906261458e-06, "loss": 0.2186, "step": 1331 }, { "epoch": 0.6297872340425532, "grad_norm": 4.513207912445068, "learning_rate": 7.254031140805399e-06, "loss": 0.2176, "step": 1332 }, { "epoch": 0.6302600472813239, "grad_norm": 5.025672435760498, "learning_rate": 7.238160859071885e-06, "loss": 0.275, "step": 1333 }, { "epoch": 0.6307328605200946, "grad_norm": 5.059742450714111, "learning_rate": 7.222298104312966e-06, "loss": 0.2367, "step": 1334 }, { "epoch": 0.6312056737588653, "grad_norm": 5.431969165802002, "learning_rate": 7.206442919760186e-06, "loss": 0.24, "step": 1335 }, { "epoch": 0.631678486997636, "grad_norm": 11.056987762451172, "learning_rate": 7.190595348624447e-06, "loss": 0.3124, "step": 1336 }, { "epoch": 0.6321513002364066, "grad_norm": 6.1571197509765625, "learning_rate": 7.1747554340959055e-06, "loss": 0.2398, "step": 1337 }, { "epoch": 0.6326241134751773, "grad_norm": 5.703886032104492, "learning_rate": 7.158923219343845e-06, "loss": 0.2612, "step": 1338 }, { "epoch": 0.633096926713948, "grad_norm": 5.536457061767578, "learning_rate": 7.1430987475165834e-06, "loss": 0.2558, "step": 1339 }, { "epoch": 0.6335697399527187, "grad_norm": 4.489446640014648, "learning_rate": 7.127282061741316e-06, "loss": 0.1784, "step": 1340 }, { "epoch": 0.6335697399527187, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7324561403508771, "eval_loss": 0.2959369122982025, "eval_precision": 0.8226600985221675, "eval_recall": 0.6600790513833992, "eval_runtime": 49.1184, "eval_samples_per_second": 5.619, "eval_steps_per_second": 0.183, "step": 1340 }, { "epoch": 0.6340425531914894, "grad_norm": 4.977541923522949, "learning_rate": 7.11147320512403e-06, "loss": 0.2467, "step": 1341 }, { "epoch": 0.6345153664302601, "grad_norm": 4.624886989593506, "learning_rate": 7.095672220749367e-06, "loss": 0.258, "step": 1342 }, { "epoch": 0.6349881796690308, "grad_norm": 4.496685028076172, "learning_rate": 7.079879151680516e-06, "loss": 0.206, "step": 1343 }, { "epoch": 0.6354609929078014, "grad_norm": 5.0084919929504395, "learning_rate": 7.064094040959107e-06, "loss": 0.1829, "step": 1344 }, { "epoch": 0.6359338061465721, "grad_norm": 5.6750168800354, "learning_rate": 7.048316931605062e-06, "loss": 0.2466, "step": 1345 }, { "epoch": 0.6364066193853428, "grad_norm": 6.028310298919678, "learning_rate": 7.032547866616512e-06, "loss": 0.2048, "step": 1346 }, { "epoch": 0.6368794326241135, "grad_norm": 4.741923809051514, "learning_rate": 7.0167868889696445e-06, "loss": 0.1621, "step": 1347 }, { "epoch": 0.6373522458628842, "grad_norm": 5.013643264770508, "learning_rate": 7.001034041618632e-06, "loss": 0.3119, "step": 1348 }, { "epoch": 0.6378250591016549, "grad_norm": 7.612677097320557, "learning_rate": 6.985289367495469e-06, "loss": 0.3243, "step": 1349 }, { "epoch": 0.6382978723404256, "grad_norm": 6.442877769470215, "learning_rate": 6.969552909509885e-06, "loss": 0.2928, "step": 1350 }, { "epoch": 0.6387706855791963, "grad_norm": 4.449213981628418, "learning_rate": 6.953824710549212e-06, "loss": 0.1977, "step": 1351 }, { "epoch": 0.6392434988179669, "grad_norm": 5.301755905151367, "learning_rate": 6.938104813478279e-06, "loss": 0.2666, "step": 1352 }, { "epoch": 0.6397163120567376, "grad_norm": 4.733539581298828, "learning_rate": 6.922393261139284e-06, "loss": 0.1967, "step": 1353 }, { "epoch": 0.6401891252955083, "grad_norm": 5.527211666107178, "learning_rate": 6.9066900963516855e-06, "loss": 0.2261, "step": 1354 }, { "epoch": 0.640661938534279, "grad_norm": 7.788763999938965, "learning_rate": 6.8909953619120836e-06, "loss": 0.2244, "step": 1355 }, { "epoch": 0.6411347517730497, "grad_norm": 4.974414825439453, "learning_rate": 6.875309100594098e-06, "loss": 0.2021, "step": 1356 }, { "epoch": 0.6416075650118204, "grad_norm": 4.9365739822387695, "learning_rate": 6.859631355148266e-06, "loss": 0.1671, "step": 1357 }, { "epoch": 0.642080378250591, "grad_norm": 5.185995578765869, "learning_rate": 6.843962168301907e-06, "loss": 0.2056, "step": 1358 }, { "epoch": 0.6425531914893617, "grad_norm": 4.460386276245117, "learning_rate": 6.828301582759018e-06, "loss": 0.1665, "step": 1359 }, { "epoch": 0.6430260047281324, "grad_norm": 6.994537353515625, "learning_rate": 6.8126496412001545e-06, "loss": 0.2712, "step": 1360 }, { "epoch": 0.6430260047281324, "eval_accuracy": 0.8592017738359202, "eval_f1": 0.7066974595842956, "eval_loss": 0.31329602003097534, "eval_precision": 0.85, "eval_recall": 0.6047430830039525, "eval_runtime": 47.7506, "eval_samples_per_second": 5.78, "eval_steps_per_second": 0.188, "step": 1360 }, { "epoch": 0.6434988179669031, "grad_norm": 4.288215160369873, "learning_rate": 6.797006386282316e-06, "loss": 0.1407, "step": 1361 }, { "epoch": 0.6439716312056738, "grad_norm": 7.29041862487793, "learning_rate": 6.7813718606388255e-06, "loss": 0.2459, "step": 1362 }, { "epoch": 0.6444444444444445, "grad_norm": 4.9920268058776855, "learning_rate": 6.7657461068792164e-06, "loss": 0.2132, "step": 1363 }, { "epoch": 0.6449172576832152, "grad_norm": 5.254515171051025, "learning_rate": 6.750129167589113e-06, "loss": 0.2016, "step": 1364 }, { "epoch": 0.6453900709219859, "grad_norm": 6.032263278961182, "learning_rate": 6.734521085330126e-06, "loss": 0.1932, "step": 1365 }, { "epoch": 0.6458628841607565, "grad_norm": 4.635222434997559, "learning_rate": 6.718921902639717e-06, "loss": 0.202, "step": 1366 }, { "epoch": 0.6463356973995272, "grad_norm": 5.365309715270996, "learning_rate": 6.7033316620310985e-06, "loss": 0.2137, "step": 1367 }, { "epoch": 0.6468085106382979, "grad_norm": 4.981945991516113, "learning_rate": 6.687750405993113e-06, "loss": 0.2489, "step": 1368 }, { "epoch": 0.6472813238770686, "grad_norm": 6.213076591491699, "learning_rate": 6.672178176990112e-06, "loss": 0.2583, "step": 1369 }, { "epoch": 0.6477541371158393, "grad_norm": 8.723681449890137, "learning_rate": 6.656615017461854e-06, "loss": 0.2961, "step": 1370 }, { "epoch": 0.64822695035461, "grad_norm": 3.6889824867248535, "learning_rate": 6.641060969823372e-06, "loss": 0.1616, "step": 1371 }, { "epoch": 0.6486997635933807, "grad_norm": 5.324930667877197, "learning_rate": 6.625516076464871e-06, "loss": 0.2571, "step": 1372 }, { "epoch": 0.6491725768321513, "grad_norm": 7.705888748168945, "learning_rate": 6.6099803797516e-06, "loss": 0.3487, "step": 1373 }, { "epoch": 0.649645390070922, "grad_norm": 7.570559024810791, "learning_rate": 6.5944539220237555e-06, "loss": 0.2652, "step": 1374 }, { "epoch": 0.6501182033096927, "grad_norm": 5.115143299102783, "learning_rate": 6.578936745596346e-06, "loss": 0.1846, "step": 1375 }, { "epoch": 0.6505910165484634, "grad_norm": 5.2409162521362305, "learning_rate": 6.563428892759087e-06, "loss": 0.1869, "step": 1376 }, { "epoch": 0.6510638297872341, "grad_norm": 7.305501937866211, "learning_rate": 6.547930405776282e-06, "loss": 0.2298, "step": 1377 }, { "epoch": 0.6515366430260048, "grad_norm": 5.585699081420898, "learning_rate": 6.532441326886716e-06, "loss": 0.1531, "step": 1378 }, { "epoch": 0.6520094562647755, "grad_norm": 5.718395709991455, "learning_rate": 6.5169616983035285e-06, "loss": 0.2375, "step": 1379 }, { "epoch": 0.6524822695035462, "grad_norm": 7.011470317840576, "learning_rate": 6.501491562214104e-06, "loss": 0.2463, "step": 1380 }, { "epoch": 0.6524822695035462, "eval_accuracy": 0.8547671840354767, "eval_f1": 0.6960556844547564, "eval_loss": 0.3179844319820404, "eval_precision": 0.8426966292134831, "eval_recall": 0.5928853754940712, "eval_runtime": 48.0507, "eval_samples_per_second": 5.744, "eval_steps_per_second": 0.187, "step": 1380 }, { "epoch": 0.6529550827423168, "grad_norm": 6.096155643463135, "learning_rate": 6.486030960779956e-06, "loss": 0.2143, "step": 1381 }, { "epoch": 0.6534278959810875, "grad_norm": 6.085330486297607, "learning_rate": 6.470579936136612e-06, "loss": 0.2922, "step": 1382 }, { "epoch": 0.6539007092198581, "grad_norm": 4.465743541717529, "learning_rate": 6.455138530393508e-06, "loss": 0.2069, "step": 1383 }, { "epoch": 0.6543735224586288, "grad_norm": 8.364693641662598, "learning_rate": 6.4397067856338524e-06, "loss": 0.2768, "step": 1384 }, { "epoch": 0.6548463356973995, "grad_norm": 4.662283897399902, "learning_rate": 6.424284743914532e-06, "loss": 0.2401, "step": 1385 }, { "epoch": 0.6553191489361702, "grad_norm": 6.041397571563721, "learning_rate": 6.408872447265984e-06, "loss": 0.2707, "step": 1386 }, { "epoch": 0.6557919621749408, "grad_norm": 5.31654167175293, "learning_rate": 6.393469937692101e-06, "loss": 0.2028, "step": 1387 }, { "epoch": 0.6562647754137115, "grad_norm": 5.916751384735107, "learning_rate": 6.378077257170081e-06, "loss": 0.2362, "step": 1388 }, { "epoch": 0.6567375886524822, "grad_norm": 5.025276184082031, "learning_rate": 6.3626944476503485e-06, "loss": 0.2574, "step": 1389 }, { "epoch": 0.6572104018912529, "grad_norm": 9.537090301513672, "learning_rate": 6.34732155105642e-06, "loss": 0.3416, "step": 1390 }, { "epoch": 0.6576832151300236, "grad_norm": 4.489987850189209, "learning_rate": 6.331958609284806e-06, "loss": 0.2113, "step": 1391 }, { "epoch": 0.6581560283687943, "grad_norm": 6.175182819366455, "learning_rate": 6.316605664204878e-06, "loss": 0.2733, "step": 1392 }, { "epoch": 0.658628841607565, "grad_norm": 8.544486999511719, "learning_rate": 6.301262757658758e-06, "loss": 0.254, "step": 1393 }, { "epoch": 0.6591016548463356, "grad_norm": 7.164076328277588, "learning_rate": 6.285929931461218e-06, "loss": 0.2604, "step": 1394 }, { "epoch": 0.6595744680851063, "grad_norm": 7.023167133331299, "learning_rate": 6.2706072273995546e-06, "loss": 0.2135, "step": 1395 }, { "epoch": 0.660047281323877, "grad_norm": 3.9141695499420166, "learning_rate": 6.255294687233484e-06, "loss": 0.1969, "step": 1396 }, { "epoch": 0.6605200945626477, "grad_norm": 9.266695976257324, "learning_rate": 6.239992352695016e-06, "loss": 0.2915, "step": 1397 }, { "epoch": 0.6609929078014184, "grad_norm": 5.782726764678955, "learning_rate": 6.224700265488343e-06, "loss": 0.2525, "step": 1398 }, { "epoch": 0.6614657210401891, "grad_norm": 5.57002067565918, "learning_rate": 6.209418467289731e-06, "loss": 0.2601, "step": 1399 }, { "epoch": 0.6619385342789598, "grad_norm": 8.637619018554688, "learning_rate": 6.194146999747419e-06, "loss": 0.3991, "step": 1400 }, { "epoch": 0.6619385342789598, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7061611374407583, "eval_loss": 0.3167264759540558, "eval_precision": 0.8816568047337278, "eval_recall": 0.5889328063241107, "eval_runtime": 49.4666, "eval_samples_per_second": 5.58, "eval_steps_per_second": 0.182, "step": 1400 }, { "epoch": 0.6624113475177305, "grad_norm": 8.265732765197754, "learning_rate": 6.1788859044814755e-06, "loss": 0.369, "step": 1401 }, { "epoch": 0.6628841607565011, "grad_norm": 6.062003135681152, "learning_rate": 6.163635223083706e-06, "loss": 0.2058, "step": 1402 }, { "epoch": 0.6633569739952718, "grad_norm": 3.8213107585906982, "learning_rate": 6.148394997117532e-06, "loss": 0.1434, "step": 1403 }, { "epoch": 0.6638297872340425, "grad_norm": 4.005511283874512, "learning_rate": 6.133165268117885e-06, "loss": 0.2036, "step": 1404 }, { "epoch": 0.6643026004728132, "grad_norm": 5.579016208648682, "learning_rate": 6.117946077591087e-06, "loss": 0.2527, "step": 1405 }, { "epoch": 0.6647754137115839, "grad_norm": 8.702812194824219, "learning_rate": 6.102737467014739e-06, "loss": 0.2678, "step": 1406 }, { "epoch": 0.6652482269503546, "grad_norm": 5.449542999267578, "learning_rate": 6.087539477837609e-06, "loss": 0.2133, "step": 1407 }, { "epoch": 0.6657210401891253, "grad_norm": 5.406524181365967, "learning_rate": 6.072352151479508e-06, "loss": 0.2225, "step": 1408 }, { "epoch": 0.6661938534278959, "grad_norm": 7.414266586303711, "learning_rate": 6.057175529331205e-06, "loss": 0.2549, "step": 1409 }, { "epoch": 0.6666666666666666, "grad_norm": 6.921921730041504, "learning_rate": 6.0420096527542835e-06, "loss": 0.2658, "step": 1410 }, { "epoch": 0.6671394799054373, "grad_norm": 6.305230617523193, "learning_rate": 6.026854563081046e-06, "loss": 0.2819, "step": 1411 }, { "epoch": 0.667612293144208, "grad_norm": 5.2894511222839355, "learning_rate": 6.0117103016143915e-06, "loss": 0.1989, "step": 1412 }, { "epoch": 0.6680851063829787, "grad_norm": 7.54205846786499, "learning_rate": 5.996576909627718e-06, "loss": 0.2917, "step": 1413 }, { "epoch": 0.6685579196217494, "grad_norm": 3.777785539627075, "learning_rate": 5.981454428364792e-06, "loss": 0.1905, "step": 1414 }, { "epoch": 0.6690307328605201, "grad_norm": 4.611357688903809, "learning_rate": 5.96634289903965e-06, "loss": 0.2231, "step": 1415 }, { "epoch": 0.6695035460992907, "grad_norm": 9.586370468139648, "learning_rate": 5.951242362836475e-06, "loss": 0.2447, "step": 1416 }, { "epoch": 0.6699763593380614, "grad_norm": 5.429812431335449, "learning_rate": 5.936152860909492e-06, "loss": 0.2668, "step": 1417 }, { "epoch": 0.6704491725768321, "grad_norm": 4.418676376342773, "learning_rate": 5.921074434382861e-06, "loss": 0.216, "step": 1418 }, { "epoch": 0.6709219858156028, "grad_norm": 4.1734089851379395, "learning_rate": 5.906007124350547e-06, "loss": 0.1834, "step": 1419 }, { "epoch": 0.6713947990543735, "grad_norm": 5.029831886291504, "learning_rate": 5.8909509718762235e-06, "loss": 0.154, "step": 1420 }, { "epoch": 0.6713947990543735, "eval_accuracy": 0.8636363636363636, "eval_f1": 0.7146171693735499, "eval_loss": 0.30272066593170166, "eval_precision": 0.8651685393258427, "eval_recall": 0.6086956521739131, "eval_runtime": 47.8176, "eval_samples_per_second": 5.772, "eval_steps_per_second": 0.188, "step": 1420 }, { "epoch": 0.6718676122931442, "grad_norm": 4.48655366897583, "learning_rate": 5.875906017993156e-06, "loss": 0.1879, "step": 1421 }, { "epoch": 0.6723404255319149, "grad_norm": 5.680935382843018, "learning_rate": 5.8608723037040894e-06, "loss": 0.2809, "step": 1422 }, { "epoch": 0.6728132387706856, "grad_norm": 6.1803083419799805, "learning_rate": 5.845849869981137e-06, "loss": 0.195, "step": 1423 }, { "epoch": 0.6732860520094562, "grad_norm": 9.49524974822998, "learning_rate": 5.830838757765671e-06, "loss": 0.3723, "step": 1424 }, { "epoch": 0.6737588652482269, "grad_norm": 4.156935214996338, "learning_rate": 5.815839007968196e-06, "loss": 0.2042, "step": 1425 }, { "epoch": 0.6742316784869976, "grad_norm": 5.995987892150879, "learning_rate": 5.8008506614682714e-06, "loss": 0.2007, "step": 1426 }, { "epoch": 0.6747044917257683, "grad_norm": 4.0844645500183105, "learning_rate": 5.785873759114364e-06, "loss": 0.1592, "step": 1427 }, { "epoch": 0.675177304964539, "grad_norm": 7.70731782913208, "learning_rate": 5.770908341723752e-06, "loss": 0.2633, "step": 1428 }, { "epoch": 0.6756501182033097, "grad_norm": 4.959256649017334, "learning_rate": 5.755954450082417e-06, "loss": 0.2326, "step": 1429 }, { "epoch": 0.6761229314420804, "grad_norm": 7.28727912902832, "learning_rate": 5.741012124944925e-06, "loss": 0.3043, "step": 1430 }, { "epoch": 0.676595744680851, "grad_norm": 6.117763519287109, "learning_rate": 5.726081407034327e-06, "loss": 0.1876, "step": 1431 }, { "epoch": 0.6770685579196217, "grad_norm": 4.031482696533203, "learning_rate": 5.711162337042033e-06, "loss": 0.2204, "step": 1432 }, { "epoch": 0.6775413711583924, "grad_norm": 4.445287227630615, "learning_rate": 5.6962549556277134e-06, "loss": 0.1462, "step": 1433 }, { "epoch": 0.6780141843971631, "grad_norm": 5.196186542510986, "learning_rate": 5.681359303419169e-06, "loss": 0.2448, "step": 1434 }, { "epoch": 0.6784869976359338, "grad_norm": 5.29311990737915, "learning_rate": 5.666475421012256e-06, "loss": 0.1858, "step": 1435 }, { "epoch": 0.6789598108747045, "grad_norm": 4.104085445404053, "learning_rate": 5.651603348970741e-06, "loss": 0.1939, "step": 1436 }, { "epoch": 0.6794326241134752, "grad_norm": 6.132163047790527, "learning_rate": 5.636743127826205e-06, "loss": 0.2087, "step": 1437 }, { "epoch": 0.6799054373522458, "grad_norm": 5.092171669006348, "learning_rate": 5.621894798077928e-06, "loss": 0.1947, "step": 1438 }, { "epoch": 0.6803782505910165, "grad_norm": 5.59557580947876, "learning_rate": 5.607058400192793e-06, "loss": 0.219, "step": 1439 }, { "epoch": 0.6808510638297872, "grad_norm": 6.623174667358398, "learning_rate": 5.592233974605154e-06, "loss": 0.1944, "step": 1440 }, { "epoch": 0.6808510638297872, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7075471698113207, "eval_loss": 0.3171689808368683, "eval_precision": 0.8771929824561403, "eval_recall": 0.5928853754940712, "eval_runtime": 46.6382, "eval_samples_per_second": 5.918, "eval_steps_per_second": 0.193, "step": 1440 }, { "epoch": 0.6813238770685579, "grad_norm": 5.490980625152588, "learning_rate": 5.577421561716738e-06, "loss": 0.2387, "step": 1441 }, { "epoch": 0.6817966903073286, "grad_norm": 6.905361652374268, "learning_rate": 5.5626212018965344e-06, "loss": 0.214, "step": 1442 }, { "epoch": 0.6822695035460993, "grad_norm": 4.289844036102295, "learning_rate": 5.547832935480686e-06, "loss": 0.1257, "step": 1443 }, { "epoch": 0.68274231678487, "grad_norm": 5.823220729827881, "learning_rate": 5.533056802772374e-06, "loss": 0.256, "step": 1444 }, { "epoch": 0.6832151300236406, "grad_norm": 5.129421234130859, "learning_rate": 5.518292844041711e-06, "loss": 0.2614, "step": 1445 }, { "epoch": 0.6836879432624113, "grad_norm": 4.678886890411377, "learning_rate": 5.503541099525633e-06, "loss": 0.1629, "step": 1446 }, { "epoch": 0.684160756501182, "grad_norm": 8.273910522460938, "learning_rate": 5.488801609427783e-06, "loss": 0.2119, "step": 1447 }, { "epoch": 0.6846335697399527, "grad_norm": 4.722292900085449, "learning_rate": 5.474074413918418e-06, "loss": 0.1892, "step": 1448 }, { "epoch": 0.6851063829787234, "grad_norm": 6.923647880554199, "learning_rate": 5.459359553134278e-06, "loss": 0.2873, "step": 1449 }, { "epoch": 0.6855791962174941, "grad_norm": 4.769101619720459, "learning_rate": 5.444657067178487e-06, "loss": 0.163, "step": 1450 }, { "epoch": 0.6860520094562648, "grad_norm": 5.0238518714904785, "learning_rate": 5.429966996120446e-06, "loss": 0.1423, "step": 1451 }, { "epoch": 0.6865248226950355, "grad_norm": 6.839998245239258, "learning_rate": 5.415289379995723e-06, "loss": 0.2498, "step": 1452 }, { "epoch": 0.6869976359338061, "grad_norm": 5.654500484466553, "learning_rate": 5.400624258805935e-06, "loss": 0.1813, "step": 1453 }, { "epoch": 0.6874704491725768, "grad_norm": 6.542251110076904, "learning_rate": 5.385971672518653e-06, "loss": 0.1936, "step": 1454 }, { "epoch": 0.6879432624113475, "grad_norm": 6.039957046508789, "learning_rate": 5.371331661067284e-06, "loss": 0.1988, "step": 1455 }, { "epoch": 0.6884160756501182, "grad_norm": 6.784928321838379, "learning_rate": 5.356704264350958e-06, "loss": 0.244, "step": 1456 }, { "epoch": 0.6888888888888889, "grad_norm": 6.722204685211182, "learning_rate": 5.342089522234439e-06, "loss": 0.2621, "step": 1457 }, { "epoch": 0.6893617021276596, "grad_norm": 4.733044147491455, "learning_rate": 5.327487474547992e-06, "loss": 0.2154, "step": 1458 }, { "epoch": 0.6898345153664303, "grad_norm": 6.589846134185791, "learning_rate": 5.312898161087288e-06, "loss": 0.2647, "step": 1459 }, { "epoch": 0.6903073286052009, "grad_norm": 5.961693286895752, "learning_rate": 5.298321621613292e-06, "loss": 0.2434, "step": 1460 }, { "epoch": 0.6903073286052009, "eval_accuracy": 0.8691796008869179, "eval_f1": 0.7412280701754386, "eval_loss": 0.303468257188797, "eval_precision": 0.8325123152709359, "eval_recall": 0.6679841897233202, "eval_runtime": 47.3008, "eval_samples_per_second": 5.835, "eval_steps_per_second": 0.19, "step": 1460 }, { "epoch": 0.6907801418439716, "grad_norm": 7.759005069732666, "learning_rate": 5.283757895852156e-06, "loss": 0.2391, "step": 1461 }, { "epoch": 0.6912529550827423, "grad_norm": 7.777105331420898, "learning_rate": 5.269207023495112e-06, "loss": 0.2959, "step": 1462 }, { "epoch": 0.691725768321513, "grad_norm": 5.419680595397949, "learning_rate": 5.25466904419836e-06, "loss": 0.2324, "step": 1463 }, { "epoch": 0.6921985815602837, "grad_norm": 8.358044624328613, "learning_rate": 5.240143997582956e-06, "loss": 0.242, "step": 1464 }, { "epoch": 0.6926713947990544, "grad_norm": 7.01899528503418, "learning_rate": 5.2256319232347275e-06, "loss": 0.2361, "step": 1465 }, { "epoch": 0.6931442080378251, "grad_norm": 9.41346549987793, "learning_rate": 5.211132860704131e-06, "loss": 0.2523, "step": 1466 }, { "epoch": 0.6936170212765957, "grad_norm": 5.518009185791016, "learning_rate": 5.196646849506169e-06, "loss": 0.271, "step": 1467 }, { "epoch": 0.6940898345153664, "grad_norm": 8.015327453613281, "learning_rate": 5.18217392912027e-06, "loss": 0.1744, "step": 1468 }, { "epoch": 0.6945626477541371, "grad_norm": 6.272970199584961, "learning_rate": 5.16771413899019e-06, "loss": 0.2644, "step": 1469 }, { "epoch": 0.6950354609929078, "grad_norm": 5.460439682006836, "learning_rate": 5.153267518523899e-06, "loss": 0.1546, "step": 1470 }, { "epoch": 0.6955082742316785, "grad_norm": 4.355556488037109, "learning_rate": 5.1388341070934735e-06, "loss": 0.1737, "step": 1471 }, { "epoch": 0.6959810874704492, "grad_norm": 5.721870422363281, "learning_rate": 5.124413944034992e-06, "loss": 0.2474, "step": 1472 }, { "epoch": 0.6964539007092199, "grad_norm": 6.737970352172852, "learning_rate": 5.110007068648422e-06, "loss": 0.2093, "step": 1473 }, { "epoch": 0.6969267139479906, "grad_norm": 5.622700214385986, "learning_rate": 5.095613520197533e-06, "loss": 0.2962, "step": 1474 }, { "epoch": 0.6973995271867612, "grad_norm": 7.287775993347168, "learning_rate": 5.081233337909756e-06, "loss": 0.237, "step": 1475 }, { "epoch": 0.6978723404255319, "grad_norm": 5.515003204345703, "learning_rate": 5.066866560976102e-06, "loss": 0.2765, "step": 1476 }, { "epoch": 0.6983451536643026, "grad_norm": 6.383912563323975, "learning_rate": 5.052513228551048e-06, "loss": 0.234, "step": 1477 }, { "epoch": 0.6988179669030733, "grad_norm": 6.042670726776123, "learning_rate": 5.038173379752425e-06, "loss": 0.3074, "step": 1478 }, { "epoch": 0.699290780141844, "grad_norm": 7.7684831619262695, "learning_rate": 5.0238470536613315e-06, "loss": 0.2179, "step": 1479 }, { "epoch": 0.6997635933806147, "grad_norm": 6.858969211578369, "learning_rate": 5.009534289321991e-06, "loss": 0.2346, "step": 1480 }, { "epoch": 0.6997635933806147, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7089201877934272, "eval_loss": 0.3162979483604431, "eval_precision": 0.8728323699421965, "eval_recall": 0.5968379446640316, "eval_runtime": 47.1019, "eval_samples_per_second": 5.86, "eval_steps_per_second": 0.191, "step": 1480 }, { "epoch": 0.7002364066193854, "grad_norm": 4.629432201385498, "learning_rate": 4.99523512574168e-06, "loss": 0.1548, "step": 1481 }, { "epoch": 0.700709219858156, "grad_norm": 5.3954548835754395, "learning_rate": 4.9809496018906e-06, "loss": 0.2797, "step": 1482 }, { "epoch": 0.7011820330969267, "grad_norm": 5.510435104370117, "learning_rate": 4.9666777567017935e-06, "loss": 0.2455, "step": 1483 }, { "epoch": 0.7016548463356974, "grad_norm": 5.090349197387695, "learning_rate": 4.9524196290710095e-06, "loss": 0.2056, "step": 1484 }, { "epoch": 0.7021276595744681, "grad_norm": 3.495950698852539, "learning_rate": 4.938175257856618e-06, "loss": 0.1664, "step": 1485 }, { "epoch": 0.7026004728132388, "grad_norm": 5.755441188812256, "learning_rate": 4.9239446818794914e-06, "loss": 0.247, "step": 1486 }, { "epoch": 0.7030732860520095, "grad_norm": 6.540046215057373, "learning_rate": 4.90972793992292e-06, "loss": 0.2858, "step": 1487 }, { "epoch": 0.7035460992907802, "grad_norm": 6.471051216125488, "learning_rate": 4.89552507073248e-06, "loss": 0.1972, "step": 1488 }, { "epoch": 0.7040189125295508, "grad_norm": 4.746458530426025, "learning_rate": 4.881336113015939e-06, "loss": 0.2121, "step": 1489 }, { "epoch": 0.7044917257683215, "grad_norm": 4.542480945587158, "learning_rate": 4.867161105443158e-06, "loss": 0.2013, "step": 1490 }, { "epoch": 0.7049645390070922, "grad_norm": 5.85392951965332, "learning_rate": 4.853000086645965e-06, "loss": 0.2253, "step": 1491 }, { "epoch": 0.7054373522458629, "grad_norm": 6.674161911010742, "learning_rate": 4.838853095218085e-06, "loss": 0.2491, "step": 1492 }, { "epoch": 0.7059101654846336, "grad_norm": 4.372111797332764, "learning_rate": 4.824720169714997e-06, "loss": 0.1928, "step": 1493 }, { "epoch": 0.7063829787234043, "grad_norm": 6.305669784545898, "learning_rate": 4.8106013486538505e-06, "loss": 0.2462, "step": 1494 }, { "epoch": 0.706855791962175, "grad_norm": 5.79583215713501, "learning_rate": 4.796496670513354e-06, "loss": 0.2655, "step": 1495 }, { "epoch": 0.7073286052009456, "grad_norm": 6.358242511749268, "learning_rate": 4.782406173733678e-06, "loss": 0.1943, "step": 1496 }, { "epoch": 0.7078014184397163, "grad_norm": 6.863159656524658, "learning_rate": 4.768329896716337e-06, "loss": 0.2634, "step": 1497 }, { "epoch": 0.708274231678487, "grad_norm": 6.125742435455322, "learning_rate": 4.7542678778240925e-06, "loss": 0.2209, "step": 1498 }, { "epoch": 0.7087470449172577, "grad_norm": 5.767421722412109, "learning_rate": 4.74022015538085e-06, "loss": 0.2381, "step": 1499 }, { "epoch": 0.7092198581560284, "grad_norm": 5.935783863067627, "learning_rate": 4.72618676767155e-06, "loss": 0.2532, "step": 1500 }, { "epoch": 0.7092198581560284, "eval_accuracy": 0.8658536585365854, "eval_f1": 0.7328918322295805, "eval_loss": 0.2938072085380554, "eval_precision": 0.83, "eval_recall": 0.6561264822134387, "eval_runtime": 47.9899, "eval_samples_per_second": 5.751, "eval_steps_per_second": 0.188, "step": 1500 }, { "epoch": 0.7096926713947991, "grad_norm": 6.156116008758545, "learning_rate": 4.712167752942067e-06, "loss": 0.2343, "step": 1501 }, { "epoch": 0.7101654846335698, "grad_norm": 7.0279412269592285, "learning_rate": 4.698163149399104e-06, "loss": 0.2633, "step": 1502 }, { "epoch": 0.7106382978723405, "grad_norm": 5.053689002990723, "learning_rate": 4.68417299521009e-06, "loss": 0.2181, "step": 1503 }, { "epoch": 0.7111111111111111, "grad_norm": 5.44726037979126, "learning_rate": 4.670197328503067e-06, "loss": 0.2635, "step": 1504 }, { "epoch": 0.7115839243498818, "grad_norm": 6.307510852813721, "learning_rate": 4.656236187366607e-06, "loss": 0.2775, "step": 1505 }, { "epoch": 0.7120567375886525, "grad_norm": 7.774320602416992, "learning_rate": 4.642289609849686e-06, "loss": 0.2855, "step": 1506 }, { "epoch": 0.7125295508274232, "grad_norm": 7.836673736572266, "learning_rate": 4.628357633961589e-06, "loss": 0.3376, "step": 1507 }, { "epoch": 0.7130023640661939, "grad_norm": 6.827236175537109, "learning_rate": 4.614440297671806e-06, "loss": 0.2518, "step": 1508 }, { "epoch": 0.7134751773049646, "grad_norm": 4.354206085205078, "learning_rate": 4.600537638909933e-06, "loss": 0.2073, "step": 1509 }, { "epoch": 0.7139479905437353, "grad_norm": 5.115018367767334, "learning_rate": 4.586649695565563e-06, "loss": 0.2146, "step": 1510 }, { "epoch": 0.7144208037825059, "grad_norm": 6.804549694061279, "learning_rate": 4.572776505488181e-06, "loss": 0.2646, "step": 1511 }, { "epoch": 0.7148936170212766, "grad_norm": 5.3641581535339355, "learning_rate": 4.558918106487065e-06, "loss": 0.2636, "step": 1512 }, { "epoch": 0.7153664302600473, "grad_norm": 7.315088272094727, "learning_rate": 4.545074536331191e-06, "loss": 0.2897, "step": 1513 }, { "epoch": 0.715839243498818, "grad_norm": 5.816035747528076, "learning_rate": 4.531245832749112e-06, "loss": 0.2956, "step": 1514 }, { "epoch": 0.7163120567375887, "grad_norm": 4.486478328704834, "learning_rate": 4.517432033428864e-06, "loss": 0.2543, "step": 1515 }, { "epoch": 0.7167848699763594, "grad_norm": 4.364136219024658, "learning_rate": 4.5036331760178695e-06, "loss": 0.1811, "step": 1516 }, { "epoch": 0.7172576832151301, "grad_norm": 8.54796314239502, "learning_rate": 4.4898492981228245e-06, "loss": 0.237, "step": 1517 }, { "epoch": 0.7177304964539007, "grad_norm": 6.10654354095459, "learning_rate": 4.4760804373096036e-06, "loss": 0.2353, "step": 1518 }, { "epoch": 0.7182033096926714, "grad_norm": 4.113856792449951, "learning_rate": 4.46232663110315e-06, "loss": 0.1487, "step": 1519 }, { "epoch": 0.7186761229314421, "grad_norm": 3.579453468322754, "learning_rate": 4.448587916987384e-06, "loss": 0.1815, "step": 1520 }, { "epoch": 0.7186761229314421, "eval_accuracy": 0.8569844789356984, "eval_f1": 0.6861313868613139, "eval_loss": 0.3156262934207916, "eval_precision": 0.8924050632911392, "eval_recall": 0.5573122529644269, "eval_runtime": 47.8319, "eval_samples_per_second": 5.77, "eval_steps_per_second": 0.188, "step": 1520 }, { "epoch": 0.7191489361702128, "grad_norm": 3.734877824783325, "learning_rate": 4.434864332405085e-06, "loss": 0.1694, "step": 1521 }, { "epoch": 0.7196217494089835, "grad_norm": 5.21559476852417, "learning_rate": 4.421155914757817e-06, "loss": 0.2566, "step": 1522 }, { "epoch": 0.7200945626477542, "grad_norm": 5.495852470397949, "learning_rate": 4.407462701405791e-06, "loss": 0.2993, "step": 1523 }, { "epoch": 0.7205673758865249, "grad_norm": 4.870457649230957, "learning_rate": 4.393784729667788e-06, "loss": 0.2035, "step": 1524 }, { "epoch": 0.7210401891252955, "grad_norm": 10.591519355773926, "learning_rate": 4.380122036821048e-06, "loss": 0.4052, "step": 1525 }, { "epoch": 0.7215130023640662, "grad_norm": 5.435426712036133, "learning_rate": 4.366474660101183e-06, "loss": 0.2258, "step": 1526 }, { "epoch": 0.7219858156028369, "grad_norm": 4.526400089263916, "learning_rate": 4.3528426367020405e-06, "loss": 0.1775, "step": 1527 }, { "epoch": 0.7224586288416076, "grad_norm": 3.7634830474853516, "learning_rate": 4.339226003775642e-06, "loss": 0.2018, "step": 1528 }, { "epoch": 0.7229314420803783, "grad_norm": 5.57973051071167, "learning_rate": 4.325624798432059e-06, "loss": 0.2942, "step": 1529 }, { "epoch": 0.723404255319149, "grad_norm": 4.420356750488281, "learning_rate": 4.312039057739316e-06, "loss": 0.217, "step": 1530 }, { "epoch": 0.7238770685579197, "grad_norm": 6.249464511871338, "learning_rate": 4.298468818723298e-06, "loss": 0.2769, "step": 1531 }, { "epoch": 0.7243498817966904, "grad_norm": 4.786191463470459, "learning_rate": 4.284914118367637e-06, "loss": 0.2363, "step": 1532 }, { "epoch": 0.724822695035461, "grad_norm": 3.865898847579956, "learning_rate": 4.271374993613615e-06, "loss": 0.1605, "step": 1533 }, { "epoch": 0.7252955082742317, "grad_norm": 4.794460296630859, "learning_rate": 4.257851481360066e-06, "loss": 0.1329, "step": 1534 }, { "epoch": 0.7257683215130024, "grad_norm": 4.994689464569092, "learning_rate": 4.244343618463281e-06, "loss": 0.2508, "step": 1535 }, { "epoch": 0.7262411347517731, "grad_norm": 5.947475910186768, "learning_rate": 4.2308514417368974e-06, "loss": 0.1934, "step": 1536 }, { "epoch": 0.7267139479905438, "grad_norm": 5.171843528747559, "learning_rate": 4.2173749879517945e-06, "loss": 0.2216, "step": 1537 }, { "epoch": 0.7271867612293145, "grad_norm": 3.876723527908325, "learning_rate": 4.2039142938360086e-06, "loss": 0.1718, "step": 1538 }, { "epoch": 0.7276595744680852, "grad_norm": 4.720749855041504, "learning_rate": 4.190469396074622e-06, "loss": 0.1817, "step": 1539 }, { "epoch": 0.7281323877068558, "grad_norm": 4.733708381652832, "learning_rate": 4.177040331309678e-06, "loss": 0.1989, "step": 1540 }, { "epoch": 0.7281323877068558, "eval_accuracy": 0.8614190687361419, "eval_f1": 0.7016706443914081, "eval_loss": 0.3186676502227783, "eval_precision": 0.8855421686746988, "eval_recall": 0.5810276679841897, "eval_runtime": 48.0319, "eval_samples_per_second": 5.746, "eval_steps_per_second": 0.187, "step": 1540 }, { "epoch": 0.7286052009456265, "grad_norm": 4.963865756988525, "learning_rate": 4.163627136140054e-06, "loss": 0.1798, "step": 1541 }, { "epoch": 0.7290780141843972, "grad_norm": 5.173526763916016, "learning_rate": 4.150229847121384e-06, "loss": 0.2075, "step": 1542 }, { "epoch": 0.7295508274231679, "grad_norm": 4.9639387130737305, "learning_rate": 4.136848500765948e-06, "loss": 0.2293, "step": 1543 }, { "epoch": 0.7300236406619386, "grad_norm": 5.996505260467529, "learning_rate": 4.123483133542588e-06, "loss": 0.2557, "step": 1544 }, { "epoch": 0.7304964539007093, "grad_norm": 3.7886509895324707, "learning_rate": 4.110133781876587e-06, "loss": 0.1741, "step": 1545 }, { "epoch": 0.7309692671394799, "grad_norm": 9.337357521057129, "learning_rate": 4.0968004821495845e-06, "loss": 0.2775, "step": 1546 }, { "epoch": 0.7314420803782505, "grad_norm": 7.194756984710693, "learning_rate": 4.083483270699461e-06, "loss": 0.2572, "step": 1547 }, { "epoch": 0.7319148936170212, "grad_norm": 5.332988739013672, "learning_rate": 4.070182183820272e-06, "loss": 0.1859, "step": 1548 }, { "epoch": 0.7323877068557919, "grad_norm": 7.105772972106934, "learning_rate": 4.056897257762111e-06, "loss": 0.2279, "step": 1549 }, { "epoch": 0.7328605200945626, "grad_norm": 3.7316532135009766, "learning_rate": 4.043628528731036e-06, "loss": 0.1744, "step": 1550 }, { "epoch": 0.7333333333333333, "grad_norm": 5.632213592529297, "learning_rate": 4.030376032888959e-06, "loss": 0.1418, "step": 1551 }, { "epoch": 0.733806146572104, "grad_norm": 11.740776062011719, "learning_rate": 4.01713980635355e-06, "loss": 0.4084, "step": 1552 }, { "epoch": 0.7342789598108747, "grad_norm": 5.153548717498779, "learning_rate": 4.003919885198145e-06, "loss": 0.1908, "step": 1553 }, { "epoch": 0.7347517730496453, "grad_norm": 7.000768184661865, "learning_rate": 3.990716305451636e-06, "loss": 0.158, "step": 1554 }, { "epoch": 0.735224586288416, "grad_norm": 7.075311183929443, "learning_rate": 3.977529103098382e-06, "loss": 0.3135, "step": 1555 }, { "epoch": 0.7356973995271867, "grad_norm": 4.545357704162598, "learning_rate": 3.964358314078107e-06, "loss": 0.1733, "step": 1556 }, { "epoch": 0.7361702127659574, "grad_norm": 7.8234357833862305, "learning_rate": 3.951203974285805e-06, "loss": 0.2067, "step": 1557 }, { "epoch": 0.7366430260047281, "grad_norm": 7.197999000549316, "learning_rate": 3.938066119571634e-06, "loss": 0.1974, "step": 1558 }, { "epoch": 0.7371158392434988, "grad_norm": 4.149715900421143, "learning_rate": 3.9249447857408316e-06, "loss": 0.1895, "step": 1559 }, { "epoch": 0.7375886524822695, "grad_norm": 6.287510395050049, "learning_rate": 3.911840008553604e-06, "loss": 0.1749, "step": 1560 }, { "epoch": 0.7375886524822695, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7136150234741784, "eval_loss": 0.3169473111629486, "eval_precision": 0.8786127167630058, "eval_recall": 0.6007905138339921, "eval_runtime": 49.2192, "eval_samples_per_second": 5.608, "eval_steps_per_second": 0.183, "step": 1560 }, { "epoch": 0.7380614657210401, "grad_norm": 6.008514881134033, "learning_rate": 3.898751823725044e-06, "loss": 0.2882, "step": 1561 }, { "epoch": 0.7385342789598108, "grad_norm": 4.948297023773193, "learning_rate": 3.885680266925016e-06, "loss": 0.2198, "step": 1562 }, { "epoch": 0.7390070921985815, "grad_norm": 8.02566146850586, "learning_rate": 3.87262537377807e-06, "loss": 0.2223, "step": 1563 }, { "epoch": 0.7394799054373522, "grad_norm": 5.575436592102051, "learning_rate": 3.85958717986334e-06, "loss": 0.2097, "step": 1564 }, { "epoch": 0.7399527186761229, "grad_norm": 6.199014663696289, "learning_rate": 3.846565720714451e-06, "loss": 0.2203, "step": 1565 }, { "epoch": 0.7404255319148936, "grad_norm": 4.431324481964111, "learning_rate": 3.83356103181942e-06, "loss": 0.196, "step": 1566 }, { "epoch": 0.7408983451536643, "grad_norm": 8.445691108703613, "learning_rate": 3.820573148620559e-06, "loss": 0.278, "step": 1567 }, { "epoch": 0.741371158392435, "grad_norm": 7.028994083404541, "learning_rate": 3.807602106514375e-06, "loss": 0.2681, "step": 1568 }, { "epoch": 0.7418439716312056, "grad_norm": 4.2099995613098145, "learning_rate": 3.79464794085148e-06, "loss": 0.2066, "step": 1569 }, { "epoch": 0.7423167848699763, "grad_norm": 4.710866928100586, "learning_rate": 3.781710686936497e-06, "loss": 0.183, "step": 1570 }, { "epoch": 0.742789598108747, "grad_norm": 5.328328609466553, "learning_rate": 3.7687903800279513e-06, "loss": 0.1954, "step": 1571 }, { "epoch": 0.7432624113475177, "grad_norm": 7.036726474761963, "learning_rate": 3.755887055338183e-06, "loss": 0.2555, "step": 1572 }, { "epoch": 0.7437352245862884, "grad_norm": 6.250298500061035, "learning_rate": 3.743000748033252e-06, "loss": 0.2065, "step": 1573 }, { "epoch": 0.7442080378250591, "grad_norm": 6.400665760040283, "learning_rate": 3.730131493232837e-06, "loss": 0.2693, "step": 1574 }, { "epoch": 0.7446808510638298, "grad_norm": 5.254453659057617, "learning_rate": 3.7172793260101446e-06, "loss": 0.1433, "step": 1575 }, { "epoch": 0.7451536643026004, "grad_norm": 7.966073989868164, "learning_rate": 3.7044442813918125e-06, "loss": 0.2912, "step": 1576 }, { "epoch": 0.7456264775413711, "grad_norm": 7.94743537902832, "learning_rate": 3.6916263943578123e-06, "loss": 0.1966, "step": 1577 }, { "epoch": 0.7460992907801418, "grad_norm": 6.912722110748291, "learning_rate": 3.6788256998413506e-06, "loss": 0.1794, "step": 1578 }, { "epoch": 0.7465721040189125, "grad_norm": 6.084993839263916, "learning_rate": 3.6660422327287914e-06, "loss": 0.1606, "step": 1579 }, { "epoch": 0.7470449172576832, "grad_norm": 5.905043125152588, "learning_rate": 3.6532760278595345e-06, "loss": 0.2141, "step": 1580 }, { "epoch": 0.7470449172576832, "eval_accuracy": 0.8669623059866962, "eval_f1": 0.7272727272727273, "eval_loss": 0.3045748174190521, "eval_precision": 0.8556149732620321, "eval_recall": 0.6324110671936759, "eval_runtime": 46.807, "eval_samples_per_second": 5.897, "eval_steps_per_second": 0.192, "step": 1580 }, { "epoch": 0.7475177304964539, "grad_norm": 6.382122039794922, "learning_rate": 3.6405271200259406e-06, "loss": 0.1741, "step": 1581 }, { "epoch": 0.7479905437352246, "grad_norm": 7.32953405380249, "learning_rate": 3.627795543973228e-06, "loss": 0.3185, "step": 1582 }, { "epoch": 0.7484633569739952, "grad_norm": 5.959390163421631, "learning_rate": 3.6150813343993817e-06, "loss": 0.244, "step": 1583 }, { "epoch": 0.7489361702127659, "grad_norm": 7.206792831420898, "learning_rate": 3.6023845259550526e-06, "loss": 0.2935, "step": 1584 }, { "epoch": 0.7494089834515366, "grad_norm": 6.780135154724121, "learning_rate": 3.5897051532434746e-06, "loss": 0.2841, "step": 1585 }, { "epoch": 0.7498817966903073, "grad_norm": 5.990156173706055, "learning_rate": 3.5770432508203525e-06, "loss": 0.2146, "step": 1586 }, { "epoch": 0.750354609929078, "grad_norm": 6.680501937866211, "learning_rate": 3.5643988531937923e-06, "loss": 0.2593, "step": 1587 }, { "epoch": 0.7508274231678487, "grad_norm": 6.1346282958984375, "learning_rate": 3.5517719948241837e-06, "loss": 0.2365, "step": 1588 }, { "epoch": 0.7513002364066194, "grad_norm": 6.420486927032471, "learning_rate": 3.5391627101241187e-06, "loss": 0.2646, "step": 1589 }, { "epoch": 0.75177304964539, "grad_norm": 5.62802791595459, "learning_rate": 3.5265710334582924e-06, "loss": 0.2584, "step": 1590 }, { "epoch": 0.7522458628841607, "grad_norm": 5.959242820739746, "learning_rate": 3.5139969991434132e-06, "loss": 0.1629, "step": 1591 }, { "epoch": 0.7527186761229314, "grad_norm": 7.4486517906188965, "learning_rate": 3.5014406414481173e-06, "loss": 0.3043, "step": 1592 }, { "epoch": 0.7531914893617021, "grad_norm": 11.137663841247559, "learning_rate": 3.488901994592846e-06, "loss": 0.2216, "step": 1593 }, { "epoch": 0.7536643026004728, "grad_norm": 7.19482946395874, "learning_rate": 3.476381092749789e-06, "loss": 0.1895, "step": 1594 }, { "epoch": 0.7541371158392435, "grad_norm": 5.478968620300293, "learning_rate": 3.463877970042765e-06, "loss": 0.2568, "step": 1595 }, { "epoch": 0.7546099290780142, "grad_norm": 5.759098052978516, "learning_rate": 3.4513926605471504e-06, "loss": 0.2078, "step": 1596 }, { "epoch": 0.7550827423167848, "grad_norm": 5.912849426269531, "learning_rate": 3.438925198289762e-06, "loss": 0.2184, "step": 1597 }, { "epoch": 0.7555555555555555, "grad_norm": 7.000945091247559, "learning_rate": 3.4264756172487813e-06, "loss": 0.2958, "step": 1598 }, { "epoch": 0.7560283687943262, "grad_norm": 6.138962745666504, "learning_rate": 3.414043951353656e-06, "loss": 0.3196, "step": 1599 }, { "epoch": 0.7565011820330969, "grad_norm": 6.125826835632324, "learning_rate": 3.401630234485014e-06, "loss": 0.2638, "step": 1600 }, { "epoch": 0.7565011820330969, "eval_accuracy": 0.8669623059866962, "eval_f1": 0.7309417040358744, "eval_loss": 0.29756960272789, "eval_precision": 0.844559585492228, "eval_recall": 0.6442687747035574, "eval_runtime": 47.3187, "eval_samples_per_second": 5.833, "eval_steps_per_second": 0.19, "step": 1600 }, { "epoch": 0.7569739952718676, "grad_norm": 6.0788445472717285, "learning_rate": 3.3892345004745607e-06, "loss": 0.1994, "step": 1601 }, { "epoch": 0.7574468085106383, "grad_norm": 6.959369659423828, "learning_rate": 3.376856783104996e-06, "loss": 0.3052, "step": 1602 }, { "epoch": 0.757919621749409, "grad_norm": 4.413602352142334, "learning_rate": 3.3644971161099083e-06, "loss": 0.1861, "step": 1603 }, { "epoch": 0.7583924349881797, "grad_norm": 4.487978935241699, "learning_rate": 3.3521555331736987e-06, "loss": 0.2593, "step": 1604 }, { "epoch": 0.7588652482269503, "grad_norm": 5.322134017944336, "learning_rate": 3.339832067931491e-06, "loss": 0.2151, "step": 1605 }, { "epoch": 0.759338061465721, "grad_norm": 5.43377685546875, "learning_rate": 3.3275267539690225e-06, "loss": 0.2738, "step": 1606 }, { "epoch": 0.7598108747044917, "grad_norm": 5.074190616607666, "learning_rate": 3.315239624822563e-06, "loss": 0.1439, "step": 1607 }, { "epoch": 0.7602836879432624, "grad_norm": 5.0913920402526855, "learning_rate": 3.30297071397882e-06, "loss": 0.2314, "step": 1608 }, { "epoch": 0.7607565011820331, "grad_norm": 6.6666107177734375, "learning_rate": 3.29072005487486e-06, "loss": 0.3486, "step": 1609 }, { "epoch": 0.7612293144208038, "grad_norm": 5.526213645935059, "learning_rate": 3.278487680897997e-06, "loss": 0.2517, "step": 1610 }, { "epoch": 0.7617021276595745, "grad_norm": 5.9422736167907715, "learning_rate": 3.2662736253857154e-06, "loss": 0.219, "step": 1611 }, { "epoch": 0.7621749408983451, "grad_norm": 5.5532426834106445, "learning_rate": 3.254077921625578e-06, "loss": 0.2077, "step": 1612 }, { "epoch": 0.7626477541371158, "grad_norm": 4.330904960632324, "learning_rate": 3.2419006028551205e-06, "loss": 0.1412, "step": 1613 }, { "epoch": 0.7631205673758865, "grad_norm": 8.491339683532715, "learning_rate": 3.2297417022617904e-06, "loss": 0.3303, "step": 1614 }, { "epoch": 0.7635933806146572, "grad_norm": 6.322214603424072, "learning_rate": 3.2176012529828295e-06, "loss": 0.2718, "step": 1615 }, { "epoch": 0.7640661938534279, "grad_norm": 6.424625873565674, "learning_rate": 3.2054792881051933e-06, "loss": 0.2817, "step": 1616 }, { "epoch": 0.7645390070921986, "grad_norm": 5.700141429901123, "learning_rate": 3.1933758406654615e-06, "loss": 0.273, "step": 1617 }, { "epoch": 0.7650118203309693, "grad_norm": 4.833881378173828, "learning_rate": 3.181290943649753e-06, "loss": 0.2003, "step": 1618 }, { "epoch": 0.76548463356974, "grad_norm": 5.849541187286377, "learning_rate": 3.1692246299936234e-06, "loss": 0.2389, "step": 1619 }, { "epoch": 0.7659574468085106, "grad_norm": 4.621654510498047, "learning_rate": 3.1571769325819834e-06, "loss": 0.2215, "step": 1620 }, { "epoch": 0.7659574468085106, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7276785714285714, "eval_loss": 0.2926580309867859, "eval_precision": 0.8358974358974359, "eval_recall": 0.6442687747035574, "eval_runtime": 48.4804, "eval_samples_per_second": 5.693, "eval_steps_per_second": 0.186, "step": 1620 }, { "epoch": 0.7664302600472813, "grad_norm": 4.861423492431641, "learning_rate": 3.1451478842490114e-06, "loss": 0.2547, "step": 1621 }, { "epoch": 0.766903073286052, "grad_norm": 5.893204212188721, "learning_rate": 3.133137517778054e-06, "loss": 0.1872, "step": 1622 }, { "epoch": 0.7673758865248227, "grad_norm": 5.206727504730225, "learning_rate": 3.1211458659015513e-06, "loss": 0.198, "step": 1623 }, { "epoch": 0.7678486997635934, "grad_norm": 5.422547340393066, "learning_rate": 3.1091729613009346e-06, "loss": 0.25, "step": 1624 }, { "epoch": 0.7683215130023641, "grad_norm": 6.1905999183654785, "learning_rate": 3.0972188366065424e-06, "loss": 0.2626, "step": 1625 }, { "epoch": 0.7687943262411348, "grad_norm": 4.419033050537109, "learning_rate": 3.08528352439753e-06, "loss": 0.2188, "step": 1626 }, { "epoch": 0.7692671394799054, "grad_norm": 4.382445335388184, "learning_rate": 3.0733670572017894e-06, "loss": 0.1589, "step": 1627 }, { "epoch": 0.7697399527186761, "grad_norm": 4.622806549072266, "learning_rate": 3.0614694674958477e-06, "loss": 0.2515, "step": 1628 }, { "epoch": 0.7702127659574468, "grad_norm": 6.51718807220459, "learning_rate": 3.0495907877047836e-06, "loss": 0.2507, "step": 1629 }, { "epoch": 0.7706855791962175, "grad_norm": 3.415192127227783, "learning_rate": 3.0377310502021405e-06, "loss": 0.1726, "step": 1630 }, { "epoch": 0.7711583924349882, "grad_norm": 4.835279941558838, "learning_rate": 3.0258902873098406e-06, "loss": 0.1817, "step": 1631 }, { "epoch": 0.7716312056737589, "grad_norm": 6.565097808837891, "learning_rate": 3.014068531298089e-06, "loss": 0.2459, "step": 1632 }, { "epoch": 0.7721040189125296, "grad_norm": 6.267763614654541, "learning_rate": 3.0022658143852923e-06, "loss": 0.2536, "step": 1633 }, { "epoch": 0.7725768321513002, "grad_norm": 4.700669765472412, "learning_rate": 2.990482168737967e-06, "loss": 0.161, "step": 1634 }, { "epoch": 0.7730496453900709, "grad_norm": 6.653107166290283, "learning_rate": 2.978717626470663e-06, "loss": 0.2972, "step": 1635 }, { "epoch": 0.7735224586288416, "grad_norm": 5.026117324829102, "learning_rate": 2.966972219645855e-06, "loss": 0.237, "step": 1636 }, { "epoch": 0.7739952718676123, "grad_norm": 6.812114238739014, "learning_rate": 2.9552459802738733e-06, "loss": 0.1928, "step": 1637 }, { "epoch": 0.774468085106383, "grad_norm": 5.485900402069092, "learning_rate": 2.943538940312807e-06, "loss": 0.1795, "step": 1638 }, { "epoch": 0.7749408983451537, "grad_norm": 4.413486003875732, "learning_rate": 2.931851131668423e-06, "loss": 0.2056, "step": 1639 }, { "epoch": 0.7754137115839244, "grad_norm": 6.196752071380615, "learning_rate": 2.920182586194075e-06, "loss": 0.2587, "step": 1640 }, { "epoch": 0.7754137115839244, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7067307692307693, "eval_loss": 0.31787875294685364, "eval_precision": 0.901840490797546, "eval_recall": 0.5810276679841897, "eval_runtime": 48.6346, "eval_samples_per_second": 5.675, "eval_steps_per_second": 0.185, "step": 1640 }, { "epoch": 0.775886524822695, "grad_norm": 4.845483303070068, "learning_rate": 2.9085333356906165e-06, "loss": 0.1588, "step": 1641 }, { "epoch": 0.7763593380614657, "grad_norm": 5.5834479331970215, "learning_rate": 2.8969034119063176e-06, "loss": 0.2241, "step": 1642 }, { "epoch": 0.7768321513002364, "grad_norm": 5.414841175079346, "learning_rate": 2.8852928465367726e-06, "loss": 0.2914, "step": 1643 }, { "epoch": 0.7773049645390071, "grad_norm": 6.023651599884033, "learning_rate": 2.8737016712248258e-06, "loss": 0.2307, "step": 1644 }, { "epoch": 0.7777777777777778, "grad_norm": 4.881513595581055, "learning_rate": 2.862129917560469e-06, "loss": 0.1618, "step": 1645 }, { "epoch": 0.7782505910165485, "grad_norm": 5.7092814445495605, "learning_rate": 2.850577617080764e-06, "loss": 0.2415, "step": 1646 }, { "epoch": 0.7787234042553192, "grad_norm": 6.310904026031494, "learning_rate": 2.839044801269756e-06, "loss": 0.2487, "step": 1647 }, { "epoch": 0.7791962174940898, "grad_norm": 8.262921333312988, "learning_rate": 2.827531501558395e-06, "loss": 0.2799, "step": 1648 }, { "epoch": 0.7796690307328605, "grad_norm": 6.071582317352295, "learning_rate": 2.8160377493244363e-06, "loss": 0.2469, "step": 1649 }, { "epoch": 0.7801418439716312, "grad_norm": 4.781665802001953, "learning_rate": 2.8045635758923563e-06, "loss": 0.169, "step": 1650 }, { "epoch": 0.7806146572104019, "grad_norm": 4.784432411193848, "learning_rate": 2.7931090125332806e-06, "loss": 0.2056, "step": 1651 }, { "epoch": 0.7810874704491726, "grad_norm": 5.871290683746338, "learning_rate": 2.7816740904648866e-06, "loss": 0.2034, "step": 1652 }, { "epoch": 0.7815602836879433, "grad_norm": 7.995057106018066, "learning_rate": 2.7702588408513276e-06, "loss": 0.3481, "step": 1653 }, { "epoch": 0.782033096926714, "grad_norm": 4.378397464752197, "learning_rate": 2.758863294803138e-06, "loss": 0.182, "step": 1654 }, { "epoch": 0.7825059101654847, "grad_norm": 6.422306060791016, "learning_rate": 2.7474874833771524e-06, "loss": 0.2954, "step": 1655 }, { "epoch": 0.7829787234042553, "grad_norm": 4.291572570800781, "learning_rate": 2.7361314375764215e-06, "loss": 0.1982, "step": 1656 }, { "epoch": 0.783451536643026, "grad_norm": 4.588647365570068, "learning_rate": 2.7247951883501343e-06, "loss": 0.1613, "step": 1657 }, { "epoch": 0.7839243498817967, "grad_norm": 5.927759647369385, "learning_rate": 2.7134787665935213e-06, "loss": 0.3002, "step": 1658 }, { "epoch": 0.7843971631205674, "grad_norm": 6.183173656463623, "learning_rate": 2.7021822031477773e-06, "loss": 0.2178, "step": 1659 }, { "epoch": 0.7848699763593381, "grad_norm": 6.231297492980957, "learning_rate": 2.6909055287999698e-06, "loss": 0.2216, "step": 1660 }, { "epoch": 0.7848699763593381, "eval_accuracy": 0.8713968957871396, "eval_f1": 0.7289719626168224, "eval_loss": 0.3045533299446106, "eval_precision": 0.8914285714285715, "eval_recall": 0.616600790513834, "eval_runtime": 49.0889, "eval_samples_per_second": 5.622, "eval_steps_per_second": 0.183, "step": 1660 }, { "epoch": 0.7853427895981088, "grad_norm": 6.300743103027344, "learning_rate": 2.6796487742829758e-06, "loss": 0.2452, "step": 1661 }, { "epoch": 0.7858156028368795, "grad_norm": 5.218397617340088, "learning_rate": 2.668411970275374e-06, "loss": 0.2879, "step": 1662 }, { "epoch": 0.7862884160756501, "grad_norm": 5.584065914154053, "learning_rate": 2.6571951474013734e-06, "loss": 0.2256, "step": 1663 }, { "epoch": 0.7867612293144208, "grad_norm": 6.801513671875, "learning_rate": 2.6459983362307263e-06, "loss": 0.2637, "step": 1664 }, { "epoch": 0.7872340425531915, "grad_norm": 3.2993884086608887, "learning_rate": 2.6348215672786435e-06, "loss": 0.1469, "step": 1665 }, { "epoch": 0.7877068557919622, "grad_norm": 5.288236141204834, "learning_rate": 2.6236648710057244e-06, "loss": 0.1577, "step": 1666 }, { "epoch": 0.7881796690307329, "grad_norm": 4.468508243560791, "learning_rate": 2.612528277817853e-06, "loss": 0.188, "step": 1667 }, { "epoch": 0.7886524822695036, "grad_norm": 5.90925407409668, "learning_rate": 2.6014118180661284e-06, "loss": 0.229, "step": 1668 }, { "epoch": 0.7891252955082743, "grad_norm": 5.6658124923706055, "learning_rate": 2.590315522046779e-06, "loss": 0.2122, "step": 1669 }, { "epoch": 0.789598108747045, "grad_norm": 6.5475921630859375, "learning_rate": 2.5792394200010805e-06, "loss": 0.2117, "step": 1670 }, { "epoch": 0.7900709219858156, "grad_norm": 5.4423112869262695, "learning_rate": 2.5681835421152736e-06, "loss": 0.2646, "step": 1671 }, { "epoch": 0.7905437352245863, "grad_norm": 4.928060531616211, "learning_rate": 2.5571479185204785e-06, "loss": 0.2074, "step": 1672 }, { "epoch": 0.791016548463357, "grad_norm": 5.3929243087768555, "learning_rate": 2.546132579292616e-06, "loss": 0.2094, "step": 1673 }, { "epoch": 0.7914893617021277, "grad_norm": 6.514987468719482, "learning_rate": 2.5351375544523306e-06, "loss": 0.2043, "step": 1674 }, { "epoch": 0.7919621749408984, "grad_norm": 4.381026268005371, "learning_rate": 2.524162873964896e-06, "loss": 0.2086, "step": 1675 }, { "epoch": 0.7924349881796691, "grad_norm": 5.139743328094482, "learning_rate": 2.513208567740144e-06, "loss": 0.1823, "step": 1676 }, { "epoch": 0.7929078014184398, "grad_norm": 4.477554798126221, "learning_rate": 2.502274665632377e-06, "loss": 0.1828, "step": 1677 }, { "epoch": 0.7933806146572104, "grad_norm": 7.049522399902344, "learning_rate": 2.491361197440291e-06, "loss": 0.2235, "step": 1678 }, { "epoch": 0.7938534278959811, "grad_norm": 5.63670539855957, "learning_rate": 2.4804681929068907e-06, "loss": 0.2404, "step": 1679 }, { "epoch": 0.7943262411347518, "grad_norm": 5.916224956512451, "learning_rate": 2.4695956817194134e-06, "loss": 0.2357, "step": 1680 }, { "epoch": 0.7943262411347518, "eval_accuracy": 0.8669623059866962, "eval_f1": 0.7272727272727273, "eval_loss": 0.2966913878917694, "eval_precision": 0.8556149732620321, "eval_recall": 0.6324110671936759, "eval_runtime": 48.6495, "eval_samples_per_second": 5.673, "eval_steps_per_second": 0.185, "step": 1680 }, { "epoch": 0.7947990543735225, "grad_norm": 6.325137138366699, "learning_rate": 2.4587436935092424e-06, "loss": 0.2087, "step": 1681 }, { "epoch": 0.7952718676122932, "grad_norm": 4.733521461486816, "learning_rate": 2.4479122578518257e-06, "loss": 0.2256, "step": 1682 }, { "epoch": 0.7957446808510639, "grad_norm": 4.68524169921875, "learning_rate": 2.4371014042666074e-06, "loss": 0.2188, "step": 1683 }, { "epoch": 0.7962174940898346, "grad_norm": 6.98213529586792, "learning_rate": 2.42631116221693e-06, "loss": 0.2244, "step": 1684 }, { "epoch": 0.7966903073286052, "grad_norm": 6.548198223114014, "learning_rate": 2.4155415611099664e-06, "loss": 0.2656, "step": 1685 }, { "epoch": 0.7971631205673759, "grad_norm": 5.059558391571045, "learning_rate": 2.404792630296633e-06, "loss": 0.1769, "step": 1686 }, { "epoch": 0.7976359338061466, "grad_norm": 6.935822010040283, "learning_rate": 2.394064399071515e-06, "loss": 0.2474, "step": 1687 }, { "epoch": 0.7981087470449173, "grad_norm": 6.197619438171387, "learning_rate": 2.3833568966727837e-06, "loss": 0.2132, "step": 1688 }, { "epoch": 0.798581560283688, "grad_norm": 6.507517337799072, "learning_rate": 2.372670152282114e-06, "loss": 0.2625, "step": 1689 }, { "epoch": 0.7990543735224587, "grad_norm": 8.216846466064453, "learning_rate": 2.362004195024613e-06, "loss": 0.2823, "step": 1690 }, { "epoch": 0.7995271867612294, "grad_norm": 5.132957935333252, "learning_rate": 2.351359053968728e-06, "loss": 0.1989, "step": 1691 }, { "epoch": 0.8, "grad_norm": 6.100037574768066, "learning_rate": 2.3407347581261863e-06, "loss": 0.2593, "step": 1692 }, { "epoch": 0.8004728132387707, "grad_norm": 8.391918182373047, "learning_rate": 2.3301313364518964e-06, "loss": 0.3208, "step": 1693 }, { "epoch": 0.8009456264775414, "grad_norm": 4.401480674743652, "learning_rate": 2.3195488178438785e-06, "loss": 0.1518, "step": 1694 }, { "epoch": 0.8014184397163121, "grad_norm": 6.447848796844482, "learning_rate": 2.308987231143186e-06, "loss": 0.2173, "step": 1695 }, { "epoch": 0.8018912529550828, "grad_norm": 6.483435153961182, "learning_rate": 2.298446605133824e-06, "loss": 0.1744, "step": 1696 }, { "epoch": 0.8023640661938535, "grad_norm": 5.87816858291626, "learning_rate": 2.2879269685426742e-06, "loss": 0.1838, "step": 1697 }, { "epoch": 0.8028368794326242, "grad_norm": 8.297409057617188, "learning_rate": 2.2774283500394134e-06, "loss": 0.2732, "step": 1698 }, { "epoch": 0.8033096926713948, "grad_norm": 9.192248344421387, "learning_rate": 2.2669507782364387e-06, "loss": 0.3547, "step": 1699 }, { "epoch": 0.8037825059101655, "grad_norm": 5.559706211090088, "learning_rate": 2.2564942816887837e-06, "loss": 0.1972, "step": 1700 }, { "epoch": 0.8037825059101655, "eval_accuracy": 0.8658536585365854, "eval_f1": 0.7218390804597701, "eval_loss": 0.30020540952682495, "eval_precision": 0.8626373626373627, "eval_recall": 0.6205533596837944, "eval_runtime": 48.6466, "eval_samples_per_second": 5.674, "eval_steps_per_second": 0.185, "step": 1700 }, { "epoch": 0.8042553191489362, "grad_norm": 5.664395332336426, "learning_rate": 2.2460588888940504e-06, "loss": 0.2274, "step": 1701 }, { "epoch": 0.8047281323877069, "grad_norm": 5.178661346435547, "learning_rate": 2.235644628292323e-06, "loss": 0.2305, "step": 1702 }, { "epoch": 0.8052009456264776, "grad_norm": 6.627544403076172, "learning_rate": 2.225251528266089e-06, "loss": 0.2816, "step": 1703 }, { "epoch": 0.8056737588652483, "grad_norm": 5.081453323364258, "learning_rate": 2.214879617140171e-06, "loss": 0.1905, "step": 1704 }, { "epoch": 0.806146572104019, "grad_norm": 6.601840496063232, "learning_rate": 2.204528923181648e-06, "loss": 0.2067, "step": 1705 }, { "epoch": 0.8066193853427897, "grad_norm": 5.457771301269531, "learning_rate": 2.194199474599763e-06, "loss": 0.2434, "step": 1706 }, { "epoch": 0.8070921985815603, "grad_norm": 6.326608657836914, "learning_rate": 2.1838912995458673e-06, "loss": 0.2722, "step": 1707 }, { "epoch": 0.807565011820331, "grad_norm": 5.285124778747559, "learning_rate": 2.1736044261133305e-06, "loss": 0.2349, "step": 1708 }, { "epoch": 0.8080378250591016, "grad_norm": 7.561131477355957, "learning_rate": 2.1633388823374722e-06, "loss": 0.2804, "step": 1709 }, { "epoch": 0.8085106382978723, "grad_norm": 5.1685309410095215, "learning_rate": 2.153094696195478e-06, "loss": 0.179, "step": 1710 }, { "epoch": 0.808983451536643, "grad_norm": 5.682158470153809, "learning_rate": 2.1428718956063253e-06, "loss": 0.2478, "step": 1711 }, { "epoch": 0.8094562647754137, "grad_norm": 6.980831146240234, "learning_rate": 2.132670508430711e-06, "loss": 0.1889, "step": 1712 }, { "epoch": 0.8099290780141843, "grad_norm": 4.640564441680908, "learning_rate": 2.1224905624709692e-06, "loss": 0.1338, "step": 1713 }, { "epoch": 0.810401891252955, "grad_norm": 5.731657981872559, "learning_rate": 2.112332085471006e-06, "loss": 0.2535, "step": 1714 }, { "epoch": 0.8108747044917257, "grad_norm": 5.084789276123047, "learning_rate": 2.102195105116215e-06, "loss": 0.271, "step": 1715 }, { "epoch": 0.8113475177304964, "grad_norm": 5.400364875793457, "learning_rate": 2.092079649033395e-06, "loss": 0.1942, "step": 1716 }, { "epoch": 0.8118203309692671, "grad_norm": 4.337155818939209, "learning_rate": 2.081985744790691e-06, "loss": 0.1472, "step": 1717 }, { "epoch": 0.8122931442080378, "grad_norm": 4.337014675140381, "learning_rate": 2.0719134198975187e-06, "loss": 0.1988, "step": 1718 }, { "epoch": 0.8127659574468085, "grad_norm": 4.499454498291016, "learning_rate": 2.06186270180447e-06, "loss": 0.1955, "step": 1719 }, { "epoch": 0.8132387706855791, "grad_norm": 6.413626670837402, "learning_rate": 2.051833617903257e-06, "loss": 0.2602, "step": 1720 }, { "epoch": 0.8132387706855791, "eval_accuracy": 0.8669623059866962, "eval_f1": 0.7209302325581395, "eval_loss": 0.30013346672058105, "eval_precision": 0.8757062146892656, "eval_recall": 0.6126482213438735, "eval_runtime": 48.9633, "eval_samples_per_second": 5.637, "eval_steps_per_second": 0.184, "step": 1720 }, { "epoch": 0.8137115839243498, "grad_norm": 6.098957538604736, "learning_rate": 2.041826195526627e-06, "loss": 0.3015, "step": 1721 }, { "epoch": 0.8141843971631205, "grad_norm": 6.596921443939209, "learning_rate": 2.031840461948301e-06, "loss": 0.3017, "step": 1722 }, { "epoch": 0.8146572104018912, "grad_norm": 6.857213497161865, "learning_rate": 2.021876444382882e-06, "loss": 0.2186, "step": 1723 }, { "epoch": 0.8151300236406619, "grad_norm": 5.775735378265381, "learning_rate": 2.011934169985792e-06, "loss": 0.2594, "step": 1724 }, { "epoch": 0.8156028368794326, "grad_norm": 6.679286956787109, "learning_rate": 2.0020136658531964e-06, "loss": 0.3236, "step": 1725 }, { "epoch": 0.8160756501182033, "grad_norm": 5.931440353393555, "learning_rate": 1.9921149590219213e-06, "loss": 0.2564, "step": 1726 }, { "epoch": 0.816548463356974, "grad_norm": 4.7152581214904785, "learning_rate": 1.9822380764694027e-06, "loss": 0.1825, "step": 1727 }, { "epoch": 0.8170212765957446, "grad_norm": 5.601851940155029, "learning_rate": 1.972383045113585e-06, "loss": 0.2052, "step": 1728 }, { "epoch": 0.8174940898345153, "grad_norm": 5.7181501388549805, "learning_rate": 1.962549891812865e-06, "loss": 0.2467, "step": 1729 }, { "epoch": 0.817966903073286, "grad_norm": 11.021007537841797, "learning_rate": 1.952738643366011e-06, "loss": 0.3405, "step": 1730 }, { "epoch": 0.8184397163120567, "grad_norm": 5.372162342071533, "learning_rate": 1.9429493265121026e-06, "loss": 0.2504, "step": 1731 }, { "epoch": 0.8189125295508274, "grad_norm": 4.431525707244873, "learning_rate": 1.9331819679304376e-06, "loss": 0.2204, "step": 1732 }, { "epoch": 0.8193853427895981, "grad_norm": 4.9457268714904785, "learning_rate": 1.923436594240473e-06, "loss": 0.1216, "step": 1733 }, { "epoch": 0.8198581560283688, "grad_norm": 6.442474365234375, "learning_rate": 1.9137132320017505e-06, "loss": 0.2644, "step": 1734 }, { "epoch": 0.8203309692671394, "grad_norm": 4.034286975860596, "learning_rate": 1.904011907713823e-06, "loss": 0.1679, "step": 1735 }, { "epoch": 0.8208037825059101, "grad_norm": 5.20193338394165, "learning_rate": 1.8943326478161806e-06, "loss": 0.1667, "step": 1736 }, { "epoch": 0.8212765957446808, "grad_norm": 5.608093738555908, "learning_rate": 1.8846754786881816e-06, "loss": 0.2191, "step": 1737 }, { "epoch": 0.8217494089834515, "grad_norm": 4.049744606018066, "learning_rate": 1.8750404266489796e-06, "loss": 0.186, "step": 1738 }, { "epoch": 0.8222222222222222, "grad_norm": 5.3404436111450195, "learning_rate": 1.8654275179574477e-06, "loss": 0.2918, "step": 1739 }, { "epoch": 0.8226950354609929, "grad_norm": 6.187091827392578, "learning_rate": 1.855836778812118e-06, "loss": 0.1873, "step": 1740 }, { "epoch": 0.8226950354609929, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7136150234741784, "eval_loss": 0.30463746190071106, "eval_precision": 0.8786127167630058, "eval_recall": 0.6007905138339921, "eval_runtime": 49.7049, "eval_samples_per_second": 5.553, "eval_steps_per_second": 0.181, "step": 1740 }, { "epoch": 0.8231678486997636, "grad_norm": 5.610990524291992, "learning_rate": 1.8462682353510974e-06, "loss": 0.258, "step": 1741 }, { "epoch": 0.8236406619385342, "grad_norm": 7.3444318771362305, "learning_rate": 1.836721913652002e-06, "loss": 0.2804, "step": 1742 }, { "epoch": 0.8241134751773049, "grad_norm": 5.178664684295654, "learning_rate": 1.8271978397318868e-06, "loss": 0.2232, "step": 1743 }, { "epoch": 0.8245862884160756, "grad_norm": 7.083937168121338, "learning_rate": 1.8176960395471754e-06, "loss": 0.306, "step": 1744 }, { "epoch": 0.8250591016548463, "grad_norm": 5.501087188720703, "learning_rate": 1.8082165389935836e-06, "loss": 0.2164, "step": 1745 }, { "epoch": 0.825531914893617, "grad_norm": 6.170442581176758, "learning_rate": 1.7987593639060586e-06, "loss": 0.2403, "step": 1746 }, { "epoch": 0.8260047281323877, "grad_norm": 6.09285306930542, "learning_rate": 1.7893245400586967e-06, "loss": 0.2852, "step": 1747 }, { "epoch": 0.8264775413711584, "grad_norm": 5.350312232971191, "learning_rate": 1.7799120931646819e-06, "loss": 0.251, "step": 1748 }, { "epoch": 0.826950354609929, "grad_norm": 5.884124755859375, "learning_rate": 1.7705220488762187e-06, "loss": 0.2269, "step": 1749 }, { "epoch": 0.8274231678486997, "grad_norm": 5.945254325866699, "learning_rate": 1.7611544327844487e-06, "loss": 0.206, "step": 1750 }, { "epoch": 0.8278959810874704, "grad_norm": 4.885594844818115, "learning_rate": 1.7518092704193913e-06, "loss": 0.2674, "step": 1751 }, { "epoch": 0.8283687943262411, "grad_norm": 5.776851654052734, "learning_rate": 1.742486587249873e-06, "loss": 0.2314, "step": 1752 }, { "epoch": 0.8288416075650118, "grad_norm": 4.074375629425049, "learning_rate": 1.733186408683456e-06, "loss": 0.1666, "step": 1753 }, { "epoch": 0.8293144208037825, "grad_norm": 3.9429306983947754, "learning_rate": 1.7239087600663684e-06, "loss": 0.2021, "step": 1754 }, { "epoch": 0.8297872340425532, "grad_norm": 4.927017688751221, "learning_rate": 1.714653666683439e-06, "loss": 0.2131, "step": 1755 }, { "epoch": 0.8302600472813239, "grad_norm": 4.356184959411621, "learning_rate": 1.7054211537580201e-06, "loss": 0.1633, "step": 1756 }, { "epoch": 0.8307328605200945, "grad_norm": 6.772974967956543, "learning_rate": 1.6962112464519343e-06, "loss": 0.2083, "step": 1757 }, { "epoch": 0.8312056737588652, "grad_norm": 5.196053504943848, "learning_rate": 1.6870239698653879e-06, "loss": 0.2203, "step": 1758 }, { "epoch": 0.8316784869976359, "grad_norm": 5.340625762939453, "learning_rate": 1.677859349036911e-06, "loss": 0.2047, "step": 1759 }, { "epoch": 0.8321513002364066, "grad_norm": 3.65738582611084, "learning_rate": 1.6687174089432934e-06, "loss": 0.1663, "step": 1760 }, { "epoch": 0.8321513002364066, "eval_accuracy": 0.8658536585365854, "eval_f1": 0.717948717948718, "eval_loss": 0.2977656424045563, "eval_precision": 0.875, "eval_recall": 0.6086956521739131, "eval_runtime": 47.9133, "eval_samples_per_second": 5.76, "eval_steps_per_second": 0.188, "step": 1760 }, { "epoch": 0.8326241134751773, "grad_norm": 5.490943431854248, "learning_rate": 1.659598174499505e-06, "loss": 0.2443, "step": 1761 }, { "epoch": 0.833096926713948, "grad_norm": 5.882487773895264, "learning_rate": 1.6505016705586475e-06, "loss": 0.2925, "step": 1762 }, { "epoch": 0.8335697399527187, "grad_norm": 4.92294979095459, "learning_rate": 1.6414279219118568e-06, "loss": 0.1559, "step": 1763 }, { "epoch": 0.8340425531914893, "grad_norm": 8.455697059631348, "learning_rate": 1.632376953288265e-06, "loss": 0.3205, "step": 1764 }, { "epoch": 0.83451536643026, "grad_norm": 5.372105598449707, "learning_rate": 1.623348789354916e-06, "loss": 0.2579, "step": 1765 }, { "epoch": 0.8349881796690307, "grad_norm": 6.4145307540893555, "learning_rate": 1.614343454716707e-06, "loss": 0.2467, "step": 1766 }, { "epoch": 0.8354609929078014, "grad_norm": 5.563887119293213, "learning_rate": 1.6053609739163134e-06, "loss": 0.192, "step": 1767 }, { "epoch": 0.8359338061465721, "grad_norm": 4.483576774597168, "learning_rate": 1.5964013714341275e-06, "loss": 0.1964, "step": 1768 }, { "epoch": 0.8364066193853428, "grad_norm": 6.770689010620117, "learning_rate": 1.587464671688187e-06, "loss": 0.2926, "step": 1769 }, { "epoch": 0.8368794326241135, "grad_norm": 3.7878456115722656, "learning_rate": 1.5785508990341192e-06, "loss": 0.1907, "step": 1770 }, { "epoch": 0.8373522458628841, "grad_norm": 7.301677227020264, "learning_rate": 1.5696600777650606e-06, "loss": 0.2305, "step": 1771 }, { "epoch": 0.8378250591016548, "grad_norm": 3.8965413570404053, "learning_rate": 1.560792232111601e-06, "loss": 0.1244, "step": 1772 }, { "epoch": 0.8382978723404255, "grad_norm": 5.381835460662842, "learning_rate": 1.551947386241708e-06, "loss": 0.2294, "step": 1773 }, { "epoch": 0.8387706855791962, "grad_norm": 3.8923234939575195, "learning_rate": 1.543125564260668e-06, "loss": 0.1775, "step": 1774 }, { "epoch": 0.8392434988179669, "grad_norm": 4.11511754989624, "learning_rate": 1.5343267902110282e-06, "loss": 0.1614, "step": 1775 }, { "epoch": 0.8397163120567376, "grad_norm": 4.022883892059326, "learning_rate": 1.5255510880725133e-06, "loss": 0.2149, "step": 1776 }, { "epoch": 0.8401891252955083, "grad_norm": 5.9938836097717285, "learning_rate": 1.5167984817619709e-06, "loss": 0.2138, "step": 1777 }, { "epoch": 0.840661938534279, "grad_norm": 6.684109210968018, "learning_rate": 1.5080689951333017e-06, "loss": 0.2798, "step": 1778 }, { "epoch": 0.8411347517730496, "grad_norm": 5.152201175689697, "learning_rate": 1.4993626519774073e-06, "loss": 0.2239, "step": 1779 }, { "epoch": 0.8416075650118203, "grad_norm": 8.338132858276367, "learning_rate": 1.4906794760221032e-06, "loss": 0.363, "step": 1780 }, { "epoch": 0.8416075650118203, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7149532710280374, "eval_loss": 0.2977047264575958, "eval_precision": 0.8742857142857143, "eval_recall": 0.6047430830039525, "eval_runtime": 48.5608, "eval_samples_per_second": 5.684, "eval_steps_per_second": 0.185, "step": 1780 }, { "epoch": 0.842080378250591, "grad_norm": 5.853981971740723, "learning_rate": 1.482019490932074e-06, "loss": 0.2162, "step": 1781 }, { "epoch": 0.8425531914893617, "grad_norm": 5.779383182525635, "learning_rate": 1.473382720308797e-06, "loss": 0.2139, "step": 1782 }, { "epoch": 0.8430260047281324, "grad_norm": 5.416299343109131, "learning_rate": 1.4647691876904835e-06, "loss": 0.1742, "step": 1783 }, { "epoch": 0.8434988179669031, "grad_norm": 6.843648433685303, "learning_rate": 1.4561789165520136e-06, "loss": 0.3138, "step": 1784 }, { "epoch": 0.8439716312056738, "grad_norm": 6.401846885681152, "learning_rate": 1.4476119303048709e-06, "loss": 0.259, "step": 1785 }, { "epoch": 0.8444444444444444, "grad_norm": 6.052865982055664, "learning_rate": 1.43906825229708e-06, "loss": 0.2637, "step": 1786 }, { "epoch": 0.8449172576832151, "grad_norm": 5.836279392242432, "learning_rate": 1.4305479058131389e-06, "loss": 0.2327, "step": 1787 }, { "epoch": 0.8453900709219858, "grad_norm": 6.56742525100708, "learning_rate": 1.4220509140739692e-06, "loss": 0.2571, "step": 1788 }, { "epoch": 0.8458628841607565, "grad_norm": 5.522575378417969, "learning_rate": 1.4135773002368314e-06, "loss": 0.1913, "step": 1789 }, { "epoch": 0.8463356973995272, "grad_norm": 6.318970203399658, "learning_rate": 1.4051270873952794e-06, "loss": 0.2334, "step": 1790 }, { "epoch": 0.8468085106382979, "grad_norm": 5.222054958343506, "learning_rate": 1.3967002985790878e-06, "loss": 0.2156, "step": 1791 }, { "epoch": 0.8472813238770686, "grad_norm": 4.232369422912598, "learning_rate": 1.3882969567541959e-06, "loss": 0.2233, "step": 1792 }, { "epoch": 0.8477541371158392, "grad_norm": 3.875591993331909, "learning_rate": 1.3799170848226395e-06, "loss": 0.1502, "step": 1793 }, { "epoch": 0.8482269503546099, "grad_norm": 7.354650020599365, "learning_rate": 1.37156070562249e-06, "loss": 0.249, "step": 1794 }, { "epoch": 0.8486997635933806, "grad_norm": 5.512684345245361, "learning_rate": 1.3632278419277933e-06, "loss": 0.2428, "step": 1795 }, { "epoch": 0.8491725768321513, "grad_norm": 8.732033729553223, "learning_rate": 1.3549185164485135e-06, "loss": 0.3614, "step": 1796 }, { "epoch": 0.849645390070922, "grad_norm": 7.074683666229248, "learning_rate": 1.3466327518304555e-06, "loss": 0.3366, "step": 1797 }, { "epoch": 0.8501182033096927, "grad_norm": 4.561595916748047, "learning_rate": 1.3383705706552174e-06, "loss": 0.2038, "step": 1798 }, { "epoch": 0.8505910165484634, "grad_norm": 4.308145523071289, "learning_rate": 1.3301319954401248e-06, "loss": 0.2003, "step": 1799 }, { "epoch": 0.851063829787234, "grad_norm": 4.1011552810668945, "learning_rate": 1.3219170486381671e-06, "loss": 0.1727, "step": 1800 }, { "epoch": 0.851063829787234, "eval_accuracy": 0.8658536585365854, "eval_f1": 0.717948717948718, "eval_loss": 0.29891377687454224, "eval_precision": 0.875, "eval_recall": 0.6086956521739131, "eval_runtime": 49.9353, "eval_samples_per_second": 5.527, "eval_steps_per_second": 0.18, "step": 1800 }, { "epoch": 0.8515366430260047, "grad_norm": 4.582373142242432, "learning_rate": 1.3137257526379366e-06, "loss": 0.1734, "step": 1801 }, { "epoch": 0.8520094562647754, "grad_norm": 5.546594619750977, "learning_rate": 1.3055581297635734e-06, "loss": 0.1714, "step": 1802 }, { "epoch": 0.8524822695035461, "grad_norm": 6.5857696533203125, "learning_rate": 1.2974142022746971e-06, "loss": 0.2197, "step": 1803 }, { "epoch": 0.8529550827423168, "grad_norm": 3.709681987762451, "learning_rate": 1.289293992366346e-06, "loss": 0.1239, "step": 1804 }, { "epoch": 0.8534278959810875, "grad_norm": 3.4839696884155273, "learning_rate": 1.2811975221689289e-06, "loss": 0.1857, "step": 1805 }, { "epoch": 0.8539007092198582, "grad_norm": 4.332188606262207, "learning_rate": 1.2731248137481468e-06, "loss": 0.2506, "step": 1806 }, { "epoch": 0.8543735224586289, "grad_norm": 5.268299579620361, "learning_rate": 1.2650758891049464e-06, "loss": 0.2326, "step": 1807 }, { "epoch": 0.8548463356973995, "grad_norm": 4.915356636047363, "learning_rate": 1.257050770175452e-06, "loss": 0.1439, "step": 1808 }, { "epoch": 0.8553191489361702, "grad_norm": 5.468433856964111, "learning_rate": 1.2490494788309115e-06, "loss": 0.2292, "step": 1809 }, { "epoch": 0.8557919621749409, "grad_norm": 8.169693946838379, "learning_rate": 1.241072036877633e-06, "loss": 0.2483, "step": 1810 }, { "epoch": 0.8562647754137116, "grad_norm": 4.5563063621521, "learning_rate": 1.2331184660569284e-06, "loss": 0.213, "step": 1811 }, { "epoch": 0.8567375886524823, "grad_norm": 5.421559810638428, "learning_rate": 1.2251887880450498e-06, "loss": 0.2602, "step": 1812 }, { "epoch": 0.857210401891253, "grad_norm": 5.384074687957764, "learning_rate": 1.217283024453133e-06, "loss": 0.2221, "step": 1813 }, { "epoch": 0.8576832151300237, "grad_norm": 4.89252233505249, "learning_rate": 1.2094011968271447e-06, "loss": 0.1907, "step": 1814 }, { "epoch": 0.8581560283687943, "grad_norm": 6.57568359375, "learning_rate": 1.2015433266478105e-06, "loss": 0.2879, "step": 1815 }, { "epoch": 0.858628841607565, "grad_norm": 7.750422954559326, "learning_rate": 1.1937094353305679e-06, "loss": 0.2787, "step": 1816 }, { "epoch": 0.8591016548463357, "grad_norm": 5.562004089355469, "learning_rate": 1.1858995442254984e-06, "loss": 0.2327, "step": 1817 }, { "epoch": 0.8595744680851064, "grad_norm": 5.795264720916748, "learning_rate": 1.178113674617285e-06, "loss": 0.2329, "step": 1818 }, { "epoch": 0.8600472813238771, "grad_norm": 4.586887836456299, "learning_rate": 1.1703518477251296e-06, "loss": 0.2449, "step": 1819 }, { "epoch": 0.8605200945626478, "grad_norm": 5.4445648193359375, "learning_rate": 1.1626140847027211e-06, "loss": 0.1995, "step": 1820 }, { "epoch": 0.8605200945626478, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7089201877934272, "eval_loss": 0.30064335465431213, "eval_precision": 0.8728323699421965, "eval_recall": 0.5968379446640316, "eval_runtime": 47.2996, "eval_samples_per_second": 5.835, "eval_steps_per_second": 0.19, "step": 1820 }, { "epoch": 0.8609929078014185, "grad_norm": 5.280003070831299, "learning_rate": 1.154900406638161e-06, "loss": 0.2273, "step": 1821 }, { "epoch": 0.8614657210401891, "grad_norm": 5.873126029968262, "learning_rate": 1.147210834553908e-06, "loss": 0.3027, "step": 1822 }, { "epoch": 0.8619385342789598, "grad_norm": 5.633482456207275, "learning_rate": 1.1395453894067322e-06, "loss": 0.2282, "step": 1823 }, { "epoch": 0.8624113475177305, "grad_norm": 7.417043209075928, "learning_rate": 1.1319040920876412e-06, "loss": 0.261, "step": 1824 }, { "epoch": 0.8628841607565012, "grad_norm": 4.741674900054932, "learning_rate": 1.1242869634218355e-06, "loss": 0.2136, "step": 1825 }, { "epoch": 0.8633569739952719, "grad_norm": 6.367619037628174, "learning_rate": 1.1166940241686453e-06, "loss": 0.2331, "step": 1826 }, { "epoch": 0.8638297872340426, "grad_norm": 7.427839756011963, "learning_rate": 1.1091252950214793e-06, "loss": 0.2836, "step": 1827 }, { "epoch": 0.8643026004728133, "grad_norm": 4.886536598205566, "learning_rate": 1.1015807966077641e-06, "loss": 0.2326, "step": 1828 }, { "epoch": 0.864775413711584, "grad_norm": 6.093448162078857, "learning_rate": 1.0940605494888856e-06, "loss": 0.1806, "step": 1829 }, { "epoch": 0.8652482269503546, "grad_norm": 5.902658939361572, "learning_rate": 1.0865645741601372e-06, "loss": 0.2035, "step": 1830 }, { "epoch": 0.8657210401891253, "grad_norm": 6.096370697021484, "learning_rate": 1.0790928910506705e-06, "loss": 0.1924, "step": 1831 }, { "epoch": 0.866193853427896, "grad_norm": 6.3981499671936035, "learning_rate": 1.0716455205234244e-06, "loss": 0.2536, "step": 1832 }, { "epoch": 0.8666666666666667, "grad_norm": 5.565893650054932, "learning_rate": 1.0642224828750803e-06, "loss": 0.2512, "step": 1833 }, { "epoch": 0.8671394799054374, "grad_norm": 6.9906206130981445, "learning_rate": 1.0568237983360041e-06, "loss": 0.2001, "step": 1834 }, { "epoch": 0.8676122931442081, "grad_norm": 5.311846733093262, "learning_rate": 1.0494494870701889e-06, "loss": 0.2169, "step": 1835 }, { "epoch": 0.8680851063829788, "grad_norm": 4.316741466522217, "learning_rate": 1.0420995691752079e-06, "loss": 0.2258, "step": 1836 }, { "epoch": 0.8685579196217494, "grad_norm": 4.4594316482543945, "learning_rate": 1.034774064682148e-06, "loss": 0.1806, "step": 1837 }, { "epoch": 0.8690307328605201, "grad_norm": 4.841372013092041, "learning_rate": 1.027472993555565e-06, "loss": 0.1777, "step": 1838 }, { "epoch": 0.8695035460992908, "grad_norm": 4.721182346343994, "learning_rate": 1.0201963756934164e-06, "loss": 0.1937, "step": 1839 }, { "epoch": 0.8699763593380615, "grad_norm": 5.975325584411621, "learning_rate": 1.012944230927031e-06, "loss": 0.154, "step": 1840 }, { "epoch": 0.8699763593380615, "eval_accuracy": 0.8680709534368071, "eval_f1": 0.7264367816091954, "eval_loss": 0.29660460352897644, "eval_precision": 0.8681318681318682, "eval_recall": 0.6245059288537549, "eval_runtime": 47.2449, "eval_samples_per_second": 5.842, "eval_steps_per_second": 0.19, "step": 1840 }, { "epoch": 0.8704491725768322, "grad_norm": 4.6500020027160645, "learning_rate": 1.0057165790210277e-06, "loss": 0.1928, "step": 1841 }, { "epoch": 0.8709219858156029, "grad_norm": 5.256702423095703, "learning_rate": 9.985134396732798e-07, "loss": 0.2108, "step": 1842 }, { "epoch": 0.8713947990543736, "grad_norm": 4.254281997680664, "learning_rate": 9.913348325148498e-07, "loss": 0.2064, "step": 1843 }, { "epoch": 0.8718676122931442, "grad_norm": 4.128483772277832, "learning_rate": 9.841807771099498e-07, "loss": 0.1908, "step": 1844 }, { "epoch": 0.8723404255319149, "grad_norm": 6.125643730163574, "learning_rate": 9.77051292955873e-07, "loss": 0.2637, "step": 1845 }, { "epoch": 0.8728132387706856, "grad_norm": 5.453957557678223, "learning_rate": 9.699463994829495e-07, "loss": 0.2566, "step": 1846 }, { "epoch": 0.8732860520094563, "grad_norm": 15.336091041564941, "learning_rate": 9.628661160544905e-07, "loss": 0.2678, "step": 1847 }, { "epoch": 0.873758865248227, "grad_norm": 7.426636695861816, "learning_rate": 9.558104619667386e-07, "loss": 0.1946, "step": 1848 }, { "epoch": 0.8742316784869977, "grad_norm": 6.527373313903809, "learning_rate": 9.487794564488106e-07, "loss": 0.2772, "step": 1849 }, { "epoch": 0.8747044917257684, "grad_norm": 5.806751728057861, "learning_rate": 9.417731186626466e-07, "loss": 0.1703, "step": 1850 }, { "epoch": 0.875177304964539, "grad_norm": 5.468467712402344, "learning_rate": 9.347914677029624e-07, "loss": 0.2873, "step": 1851 }, { "epoch": 0.8756501182033097, "grad_norm": 7.120368957519531, "learning_rate": 9.278345225971863e-07, "loss": 0.296, "step": 1852 }, { "epoch": 0.8761229314420804, "grad_norm": 6.212596893310547, "learning_rate": 9.209023023054253e-07, "loss": 0.2348, "step": 1853 }, { "epoch": 0.8765957446808511, "grad_norm": 3.766883373260498, "learning_rate": 9.139948257203934e-07, "loss": 0.1481, "step": 1854 }, { "epoch": 0.8770685579196218, "grad_norm": 3.6534929275512695, "learning_rate": 9.071121116673731e-07, "loss": 0.1831, "step": 1855 }, { "epoch": 0.8775413711583925, "grad_norm": 6.801371097564697, "learning_rate": 9.002541789041608e-07, "loss": 0.257, "step": 1856 }, { "epoch": 0.8780141843971632, "grad_norm": 6.545820713043213, "learning_rate": 8.934210461210136e-07, "loss": 0.2464, "step": 1857 }, { "epoch": 0.8784869976359339, "grad_norm": 7.8918914794921875, "learning_rate": 8.866127319406004e-07, "loss": 0.2951, "step": 1858 }, { "epoch": 0.8789598108747045, "grad_norm": 7.128468036651611, "learning_rate": 8.79829254917951e-07, "loss": 0.3351, "step": 1859 }, { "epoch": 0.8794326241134752, "grad_norm": 7.129080772399902, "learning_rate": 8.73070633540406e-07, "loss": 0.1821, "step": 1860 }, { "epoch": 0.8794326241134752, "eval_accuracy": 0.8669623059866962, "eval_f1": 0.7235023041474654, "eval_loss": 0.29677459597587585, "eval_precision": 0.8674033149171271, "eval_recall": 0.6205533596837944, "eval_runtime": 48.3135, "eval_samples_per_second": 5.713, "eval_steps_per_second": 0.186, "step": 1860 }, { "epoch": 0.8799054373522459, "grad_norm": 5.813145637512207, "learning_rate": 8.663368862275634e-07, "loss": 0.2184, "step": 1861 }, { "epoch": 0.8803782505910166, "grad_norm": 4.450648307800293, "learning_rate": 8.596280313312355e-07, "loss": 0.2037, "step": 1862 }, { "epoch": 0.8808510638297873, "grad_norm": 4.639596939086914, "learning_rate": 8.5294408713539e-07, "loss": 0.2164, "step": 1863 }, { "epoch": 0.881323877068558, "grad_norm": 5.317780017852783, "learning_rate": 8.462850718561045e-07, "loss": 0.2591, "step": 1864 }, { "epoch": 0.8817966903073287, "grad_norm": 5.928182125091553, "learning_rate": 8.396510036415173e-07, "loss": 0.2807, "step": 1865 }, { "epoch": 0.8822695035460993, "grad_norm": 8.71645736694336, "learning_rate": 8.330419005717782e-07, "loss": 0.3168, "step": 1866 }, { "epoch": 0.88274231678487, "grad_norm": 5.529267311096191, "learning_rate": 8.264577806589968e-07, "loss": 0.2113, "step": 1867 }, { "epoch": 0.8832151300236407, "grad_norm": 4.838929176330566, "learning_rate": 8.198986618471949e-07, "loss": 0.1428, "step": 1868 }, { "epoch": 0.8836879432624114, "grad_norm": 5.252522945404053, "learning_rate": 8.133645620122566e-07, "loss": 0.2061, "step": 1869 }, { "epoch": 0.8841607565011821, "grad_norm": 5.35953950881958, "learning_rate": 8.068554989618871e-07, "loss": 0.1998, "step": 1870 }, { "epoch": 0.8846335697399527, "grad_norm": 5.610535621643066, "learning_rate": 8.003714904355486e-07, "loss": 0.1787, "step": 1871 }, { "epoch": 0.8851063829787233, "grad_norm": 7.672272205352783, "learning_rate": 7.939125541044268e-07, "loss": 0.147, "step": 1872 }, { "epoch": 0.885579196217494, "grad_norm": 6.3421711921691895, "learning_rate": 7.874787075713742e-07, "loss": 0.2605, "step": 1873 }, { "epoch": 0.8860520094562647, "grad_norm": 6.709553241729736, "learning_rate": 7.810699683708644e-07, "loss": 0.2765, "step": 1874 }, { "epoch": 0.8865248226950354, "grad_norm": 7.121283531188965, "learning_rate": 7.74686353968952e-07, "loss": 0.2537, "step": 1875 }, { "epoch": 0.8869976359338061, "grad_norm": 7.508021831512451, "learning_rate": 7.683278817632056e-07, "loss": 0.2712, "step": 1876 }, { "epoch": 0.8874704491725768, "grad_norm": 6.003512859344482, "learning_rate": 7.619945690826824e-07, "loss": 0.2222, "step": 1877 }, { "epoch": 0.8879432624113475, "grad_norm": 6.198127746582031, "learning_rate": 7.556864331878633e-07, "loss": 0.2216, "step": 1878 }, { "epoch": 0.8884160756501182, "grad_norm": 4.947995185852051, "learning_rate": 7.494034912706227e-07, "loss": 0.1685, "step": 1879 }, { "epoch": 0.8888888888888888, "grad_norm": 7.408123016357422, "learning_rate": 7.43145760454167e-07, "loss": 0.2354, "step": 1880 }, { "epoch": 0.8888888888888888, "eval_accuracy": 0.8669623059866962, "eval_f1": 0.726027397260274, "eval_loss": 0.2952026128768921, "eval_precision": 0.8594594594594595, "eval_recall": 0.6284584980237155, "eval_runtime": 47.4827, "eval_samples_per_second": 5.813, "eval_steps_per_second": 0.19, "step": 1880 }, { "epoch": 0.8893617021276595, "grad_norm": 5.347765922546387, "learning_rate": 7.369132577929938e-07, "loss": 0.2025, "step": 1881 }, { "epoch": 0.8898345153664302, "grad_norm": 5.331334590911865, "learning_rate": 7.307060002728462e-07, "loss": 0.2239, "step": 1882 }, { "epoch": 0.8903073286052009, "grad_norm": 4.435246467590332, "learning_rate": 7.245240048106628e-07, "loss": 0.1737, "step": 1883 }, { "epoch": 0.8907801418439716, "grad_norm": 7.154726028442383, "learning_rate": 7.183672882545401e-07, "loss": 0.2582, "step": 1884 }, { "epoch": 0.8912529550827423, "grad_norm": 4.464818000793457, "learning_rate": 7.122358673836782e-07, "loss": 0.1574, "step": 1885 }, { "epoch": 0.891725768321513, "grad_norm": 6.102884769439697, "learning_rate": 7.061297589083327e-07, "loss": 0.2082, "step": 1886 }, { "epoch": 0.8921985815602836, "grad_norm": 5.337555408477783, "learning_rate": 7.000489794697774e-07, "loss": 0.237, "step": 1887 }, { "epoch": 0.8926713947990543, "grad_norm": 6.383353233337402, "learning_rate": 6.939935456402613e-07, "loss": 0.2242, "step": 1888 }, { "epoch": 0.893144208037825, "grad_norm": 5.135204792022705, "learning_rate": 6.879634739229502e-07, "loss": 0.2586, "step": 1889 }, { "epoch": 0.8936170212765957, "grad_norm": 13.136929512023926, "learning_rate": 6.819587807518924e-07, "loss": 0.3131, "step": 1890 }, { "epoch": 0.8940898345153664, "grad_norm": 5.313321590423584, "learning_rate": 6.759794824919686e-07, "loss": 0.2519, "step": 1891 }, { "epoch": 0.8945626477541371, "grad_norm": 5.850648403167725, "learning_rate": 6.700255954388535e-07, "loss": 0.2373, "step": 1892 }, { "epoch": 0.8950354609929078, "grad_norm": 5.615677356719971, "learning_rate": 6.640971358189651e-07, "loss": 0.1992, "step": 1893 }, { "epoch": 0.8955082742316784, "grad_norm": 6.700225830078125, "learning_rate": 6.581941197894226e-07, "loss": 0.2574, "step": 1894 }, { "epoch": 0.8959810874704491, "grad_norm": 9.15202808380127, "learning_rate": 6.523165634380047e-07, "loss": 0.3287, "step": 1895 }, { "epoch": 0.8964539007092198, "grad_norm": 4.325416564941406, "learning_rate": 6.464644827830945e-07, "loss": 0.1514, "step": 1896 }, { "epoch": 0.8969267139479905, "grad_norm": 5.760486602783203, "learning_rate": 6.406378937736602e-07, "loss": 0.2557, "step": 1897 }, { "epoch": 0.8973995271867612, "grad_norm": 5.745640754699707, "learning_rate": 6.348368122891857e-07, "loss": 0.1752, "step": 1898 }, { "epoch": 0.8978723404255319, "grad_norm": 6.136902809143066, "learning_rate": 6.29061254139639e-07, "loss": 0.2628, "step": 1899 }, { "epoch": 0.8983451536643026, "grad_norm": 10.711871147155762, "learning_rate": 6.233112350654302e-07, "loss": 0.3563, "step": 1900 }, { "epoch": 0.8983451536643026, "eval_accuracy": 0.8669623059866962, "eval_f1": 0.7272727272727273, "eval_loss": 0.2932513654232025, "eval_precision": 0.8556149732620321, "eval_recall": 0.6324110671936759, "eval_runtime": 47.8382, "eval_samples_per_second": 5.769, "eval_steps_per_second": 0.188, "step": 1900 }, { "epoch": 0.8988179669030733, "grad_norm": 6.118460178375244, "learning_rate": 6.175867707373695e-07, "loss": 0.2678, "step": 1901 }, { "epoch": 0.8992907801418439, "grad_norm": 5.572527885437012, "learning_rate": 6.118878767566139e-07, "loss": 0.2428, "step": 1902 }, { "epoch": 0.8997635933806146, "grad_norm": 6.919821262359619, "learning_rate": 6.062145686546383e-07, "loss": 0.1785, "step": 1903 }, { "epoch": 0.9002364066193853, "grad_norm": 5.680126667022705, "learning_rate": 6.00566861893186e-07, "loss": 0.2201, "step": 1904 }, { "epoch": 0.900709219858156, "grad_norm": 5.649215221405029, "learning_rate": 5.949447718642254e-07, "loss": 0.169, "step": 1905 }, { "epoch": 0.9011820330969267, "grad_norm": 6.076656341552734, "learning_rate": 5.893483138899125e-07, "loss": 0.219, "step": 1906 }, { "epoch": 0.9016548463356974, "grad_norm": 5.83716344833374, "learning_rate": 5.837775032225479e-07, "loss": 0.2754, "step": 1907 }, { "epoch": 0.902127659574468, "grad_norm": 4.6485819816589355, "learning_rate": 5.782323550445313e-07, "loss": 0.2558, "step": 1908 }, { "epoch": 0.9026004728132387, "grad_norm": 5.645073890686035, "learning_rate": 5.727128844683227e-07, "loss": 0.214, "step": 1909 }, { "epoch": 0.9030732860520094, "grad_norm": 7.4083476066589355, "learning_rate": 5.672191065364097e-07, "loss": 0.2417, "step": 1910 }, { "epoch": 0.9035460992907801, "grad_norm": 6.812260150909424, "learning_rate": 5.617510362212486e-07, "loss": 0.3103, "step": 1911 }, { "epoch": 0.9040189125295508, "grad_norm": 5.349275588989258, "learning_rate": 5.563086884252389e-07, "loss": 0.206, "step": 1912 }, { "epoch": 0.9044917257683215, "grad_norm": 5.61432409286499, "learning_rate": 5.508920779806748e-07, "loss": 0.2645, "step": 1913 }, { "epoch": 0.9049645390070922, "grad_norm": 8.072186470031738, "learning_rate": 5.455012196497089e-07, "loss": 0.2231, "step": 1914 }, { "epoch": 0.9054373522458629, "grad_norm": 6.193761348724365, "learning_rate": 5.4013612812431e-07, "loss": 0.2029, "step": 1915 }, { "epoch": 0.9059101654846335, "grad_norm": 3.4695332050323486, "learning_rate": 5.34796818026222e-07, "loss": 0.194, "step": 1916 }, { "epoch": 0.9063829787234042, "grad_norm": 4.863160133361816, "learning_rate": 5.294833039069269e-07, "loss": 0.1776, "step": 1917 }, { "epoch": 0.9068557919621749, "grad_norm": 5.608933448791504, "learning_rate": 5.241956002476045e-07, "loss": 0.2093, "step": 1918 }, { "epoch": 0.9073286052009456, "grad_norm": 4.8589959144592285, "learning_rate": 5.189337214590895e-07, "loss": 0.1433, "step": 1919 }, { "epoch": 0.9078014184397163, "grad_norm": 5.196472644805908, "learning_rate": 5.136976818818373e-07, "loss": 0.2716, "step": 1920 }, { "epoch": 0.9078014184397163, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7188940092165899, "eval_loss": 0.296786367893219, "eval_precision": 0.861878453038674, "eval_recall": 0.616600790513834, "eval_runtime": 48.2695, "eval_samples_per_second": 5.718, "eval_steps_per_second": 0.186, "step": 1920 }, { "epoch": 0.908274231678487, "grad_norm": 5.688756942749023, "learning_rate": 5.08487495785881e-07, "loss": 0.1862, "step": 1921 }, { "epoch": 0.9087470449172577, "grad_norm": 4.335201263427734, "learning_rate": 5.03303177370793e-07, "loss": 0.2051, "step": 1922 }, { "epoch": 0.9092198581560283, "grad_norm": 5.4330735206604, "learning_rate": 4.981447407656504e-07, "loss": 0.2108, "step": 1923 }, { "epoch": 0.909692671394799, "grad_norm": 7.466004848480225, "learning_rate": 4.930122000289905e-07, "loss": 0.2334, "step": 1924 }, { "epoch": 0.9101654846335697, "grad_norm": 8.651698112487793, "learning_rate": 4.879055691487767e-07, "loss": 0.2628, "step": 1925 }, { "epoch": 0.9106382978723404, "grad_norm": 6.537608623504639, "learning_rate": 4.828248620423559e-07, "loss": 0.2477, "step": 1926 }, { "epoch": 0.9111111111111111, "grad_norm": 5.294820308685303, "learning_rate": 4.77770092556431e-07, "loss": 0.2558, "step": 1927 }, { "epoch": 0.9115839243498818, "grad_norm": 6.457219123840332, "learning_rate": 4.72741274467009e-07, "loss": 0.2544, "step": 1928 }, { "epoch": 0.9120567375886525, "grad_norm": 6.060578346252441, "learning_rate": 4.6773842147937234e-07, "loss": 0.2504, "step": 1929 }, { "epoch": 0.9125295508274232, "grad_norm": 6.253387928009033, "learning_rate": 4.627615472280389e-07, "loss": 0.2758, "step": 1930 }, { "epoch": 0.9130023640661938, "grad_norm": 5.318558692932129, "learning_rate": 4.5781066527673003e-07, "loss": 0.1307, "step": 1931 }, { "epoch": 0.9134751773049645, "grad_norm": 6.5254316329956055, "learning_rate": 4.528857891183214e-07, "loss": 0.2367, "step": 1932 }, { "epoch": 0.9139479905437352, "grad_norm": 4.4486165046691895, "learning_rate": 4.479869321748187e-07, "loss": 0.1974, "step": 1933 }, { "epoch": 0.9144208037825059, "grad_norm": 5.705449104309082, "learning_rate": 4.431141077973156e-07, "loss": 0.1546, "step": 1934 }, { "epoch": 0.9148936170212766, "grad_norm": 6.80421781539917, "learning_rate": 4.382673292659545e-07, "loss": 0.2338, "step": 1935 }, { "epoch": 0.9153664302600473, "grad_norm": 6.027945518493652, "learning_rate": 4.334466097899015e-07, "loss": 0.2387, "step": 1936 }, { "epoch": 0.915839243498818, "grad_norm": 7.638448715209961, "learning_rate": 4.28651962507296e-07, "loss": 0.3043, "step": 1937 }, { "epoch": 0.9163120567375886, "grad_norm": 5.784573078155518, "learning_rate": 4.2388340048522325e-07, "loss": 0.1626, "step": 1938 }, { "epoch": 0.9167848699763593, "grad_norm": 7.274070739746094, "learning_rate": 4.191409367196753e-07, "loss": 0.3126, "step": 1939 }, { "epoch": 0.91725768321513, "grad_norm": 4.2528533935546875, "learning_rate": 4.1442458413552324e-07, "loss": 0.1428, "step": 1940 }, { "epoch": 0.91725768321513, "eval_accuracy": 0.8636363636363636, "eval_f1": 0.7146171693735499, "eval_loss": 0.2969609200954437, "eval_precision": 0.8651685393258427, "eval_recall": 0.6086956521739131, "eval_runtime": 49.0031, "eval_samples_per_second": 5.632, "eval_steps_per_second": 0.184, "step": 1940 }, { "epoch": 0.9177304964539007, "grad_norm": 6.376473426818848, "learning_rate": 4.097343555864719e-07, "loss": 0.3121, "step": 1941 }, { "epoch": 0.9182033096926714, "grad_norm": 4.471124172210693, "learning_rate": 4.0507026385502747e-07, "loss": 0.2247, "step": 1942 }, { "epoch": 0.9186761229314421, "grad_norm": 4.96635103225708, "learning_rate": 4.0043232165246413e-07, "loss": 0.1916, "step": 1943 }, { "epoch": 0.9191489361702128, "grad_norm": 4.768991947174072, "learning_rate": 3.958205416187966e-07, "loss": 0.1832, "step": 1944 }, { "epoch": 0.9196217494089834, "grad_norm": 3.4908788204193115, "learning_rate": 3.9123493632272967e-07, "loss": 0.1689, "step": 1945 }, { "epoch": 0.9200945626477541, "grad_norm": 7.56951379776001, "learning_rate": 3.8667551826163774e-07, "loss": 0.2176, "step": 1946 }, { "epoch": 0.9205673758865248, "grad_norm": 6.852828502655029, "learning_rate": 3.821422998615254e-07, "loss": 0.2735, "step": 1947 }, { "epoch": 0.9210401891252955, "grad_norm": 5.238857269287109, "learning_rate": 3.776352934769911e-07, "loss": 0.2495, "step": 1948 }, { "epoch": 0.9215130023640662, "grad_norm": 6.270791530609131, "learning_rate": 3.731545113912005e-07, "loss": 0.2455, "step": 1949 }, { "epoch": 0.9219858156028369, "grad_norm": 6.1830034255981445, "learning_rate": 3.6869996581584746e-07, "loss": 0.252, "step": 1950 }, { "epoch": 0.9224586288416076, "grad_norm": 6.186679840087891, "learning_rate": 3.6427166889112184e-07, "loss": 0.2653, "step": 1951 }, { "epoch": 0.9229314420803783, "grad_norm": 4.7130126953125, "learning_rate": 3.5986963268567433e-07, "loss": 0.1775, "step": 1952 }, { "epoch": 0.9234042553191489, "grad_norm": 4.696549892425537, "learning_rate": 3.5549386919659033e-07, "loss": 0.2533, "step": 1953 }, { "epoch": 0.9238770685579196, "grad_norm": 4.767563819885254, "learning_rate": 3.5114439034935053e-07, "loss": 0.2097, "step": 1954 }, { "epoch": 0.9243498817966903, "grad_norm": 3.9315848350524902, "learning_rate": 3.468212079978017e-07, "loss": 0.1625, "step": 1955 }, { "epoch": 0.924822695035461, "grad_norm": 3.269307851791382, "learning_rate": 3.4252433392412244e-07, "loss": 0.1028, "step": 1956 }, { "epoch": 0.9252955082742317, "grad_norm": 6.641714096069336, "learning_rate": 3.3825377983879195e-07, "loss": 0.2642, "step": 1957 }, { "epoch": 0.9257683215130024, "grad_norm": 6.65203332901001, "learning_rate": 3.340095573805613e-07, "loss": 0.2346, "step": 1958 }, { "epoch": 0.926241134751773, "grad_norm": 6.2382025718688965, "learning_rate": 3.2979167811641567e-07, "loss": 0.2514, "step": 1959 }, { "epoch": 0.9267139479905437, "grad_norm": 4.330326557159424, "learning_rate": 3.256001535415465e-07, "loss": 0.2108, "step": 1960 }, { "epoch": 0.9267139479905437, "eval_accuracy": 0.8658536585365854, "eval_f1": 0.7205542725173211, "eval_loss": 0.29785633087158203, "eval_precision": 0.8666666666666667, "eval_recall": 0.616600790513834, "eval_runtime": 47.846, "eval_samples_per_second": 5.769, "eval_steps_per_second": 0.188, "step": 1960 }, { "epoch": 0.9271867612293144, "grad_norm": 4.008530139923096, "learning_rate": 3.214349950793183e-07, "loss": 0.1599, "step": 1961 }, { "epoch": 0.9276595744680851, "grad_norm": 6.942195415496826, "learning_rate": 3.172962140812419e-07, "loss": 0.3592, "step": 1962 }, { "epoch": 0.9281323877068558, "grad_norm": 4.6299567222595215, "learning_rate": 3.1318382182693894e-07, "loss": 0.2181, "step": 1963 }, { "epoch": 0.9286052009456265, "grad_norm": 5.631269454956055, "learning_rate": 3.0909782952410984e-07, "loss": 0.269, "step": 1964 }, { "epoch": 0.9290780141843972, "grad_norm": 4.7120232582092285, "learning_rate": 3.05038248308509e-07, "loss": 0.1236, "step": 1965 }, { "epoch": 0.9295508274231679, "grad_norm": 7.05232048034668, "learning_rate": 3.010050892439109e-07, "loss": 0.2494, "step": 1966 }, { "epoch": 0.9300236406619385, "grad_norm": 4.27794885635376, "learning_rate": 2.9699836332208186e-07, "loss": 0.1902, "step": 1967 }, { "epoch": 0.9304964539007092, "grad_norm": 5.519193172454834, "learning_rate": 2.930180814627448e-07, "loss": 0.2123, "step": 1968 }, { "epoch": 0.9309692671394799, "grad_norm": 5.249775409698486, "learning_rate": 2.890642545135569e-07, "loss": 0.2105, "step": 1969 }, { "epoch": 0.9314420803782506, "grad_norm": 6.687892436981201, "learning_rate": 2.851368932500742e-07, "loss": 0.2725, "step": 1970 }, { "epoch": 0.9319148936170213, "grad_norm": 5.885591506958008, "learning_rate": 2.8123600837572594e-07, "loss": 0.261, "step": 1971 }, { "epoch": 0.932387706855792, "grad_norm": 4.598552227020264, "learning_rate": 2.773616105217836e-07, "loss": 0.1995, "step": 1972 }, { "epoch": 0.9328605200945627, "grad_norm": 6.907764434814453, "learning_rate": 2.7351371024733174e-07, "loss": 0.2393, "step": 1973 }, { "epoch": 0.9333333333333333, "grad_norm": 5.189178466796875, "learning_rate": 2.6969231803923856e-07, "loss": 0.1963, "step": 1974 }, { "epoch": 0.933806146572104, "grad_norm": 5.675337791442871, "learning_rate": 2.6589744431213313e-07, "loss": 0.2482, "step": 1975 }, { "epoch": 0.9342789598108747, "grad_norm": 4.176632881164551, "learning_rate": 2.621290994083692e-07, "loss": 0.1704, "step": 1976 }, { "epoch": 0.9347517730496454, "grad_norm": 4.455401420593262, "learning_rate": 2.5838729359799917e-07, "loss": 0.2635, "step": 1977 }, { "epoch": 0.9352245862884161, "grad_norm": 5.684086799621582, "learning_rate": 2.546720370787492e-07, "loss": 0.2496, "step": 1978 }, { "epoch": 0.9356973995271868, "grad_norm": 7.903246879577637, "learning_rate": 2.5098333997598755e-07, "loss": 0.3008, "step": 1979 }, { "epoch": 0.9361702127659575, "grad_norm": 4.9532270431518555, "learning_rate": 2.4732121234270156e-07, "loss": 0.1501, "step": 1980 }, { "epoch": 0.9361702127659575, "eval_accuracy": 0.8669623059866962, "eval_f1": 0.7222222222222222, "eval_loss": 0.2986098527908325, "eval_precision": 0.8715083798882681, "eval_recall": 0.616600790513834, "eval_runtime": 49.3058, "eval_samples_per_second": 5.598, "eval_steps_per_second": 0.183, "step": 1980 }, { "epoch": 0.9366430260047282, "grad_norm": 4.474625110626221, "learning_rate": 2.4368566415946536e-07, "loss": 0.1952, "step": 1981 }, { "epoch": 0.9371158392434988, "grad_norm": 7.129388809204102, "learning_rate": 2.400767053344144e-07, "loss": 0.2342, "step": 1982 }, { "epoch": 0.9375886524822695, "grad_norm": 7.6979780197143555, "learning_rate": 2.3649434570321984e-07, "loss": 0.2414, "step": 1983 }, { "epoch": 0.9380614657210402, "grad_norm": 5.29350471496582, "learning_rate": 2.3293859502906192e-07, "loss": 0.241, "step": 1984 }, { "epoch": 0.9385342789598109, "grad_norm": 5.01874303817749, "learning_rate": 2.2940946300260113e-07, "loss": 0.2131, "step": 1985 }, { "epoch": 0.9390070921985816, "grad_norm": 5.676163673400879, "learning_rate": 2.2590695924195048e-07, "loss": 0.3109, "step": 1986 }, { "epoch": 0.9394799054373523, "grad_norm": 4.1814045906066895, "learning_rate": 2.2243109329265545e-07, "loss": 0.1398, "step": 1987 }, { "epoch": 0.939952718676123, "grad_norm": 5.860604763031006, "learning_rate": 2.1898187462766395e-07, "loss": 0.2024, "step": 1988 }, { "epoch": 0.9404255319148936, "grad_norm": 5.536343574523926, "learning_rate": 2.1555931264729657e-07, "loss": 0.2877, "step": 1989 }, { "epoch": 0.9408983451536643, "grad_norm": 4.574560642242432, "learning_rate": 2.121634166792308e-07, "loss": 0.226, "step": 1990 }, { "epoch": 0.941371158392435, "grad_norm": 6.119741439819336, "learning_rate": 2.087941959784634e-07, "loss": 0.213, "step": 1991 }, { "epoch": 0.9418439716312057, "grad_norm": 5.73854398727417, "learning_rate": 2.054516597272993e-07, "loss": 0.2295, "step": 1992 }, { "epoch": 0.9423167848699764, "grad_norm": 6.395056247711182, "learning_rate": 2.021358170353138e-07, "loss": 0.2884, "step": 1993 }, { "epoch": 0.9427895981087471, "grad_norm": 6.370244026184082, "learning_rate": 1.988466769393349e-07, "loss": 0.2622, "step": 1994 }, { "epoch": 0.9432624113475178, "grad_norm": 5.031834125518799, "learning_rate": 1.9558424840341428e-07, "loss": 0.2347, "step": 1995 }, { "epoch": 0.9437352245862884, "grad_norm": 4.863191604614258, "learning_rate": 1.9234854031880856e-07, "loss": 0.2221, "step": 1996 }, { "epoch": 0.9442080378250591, "grad_norm": 7.025779724121094, "learning_rate": 1.891395615039504e-07, "loss": 0.2246, "step": 1997 }, { "epoch": 0.9446808510638298, "grad_norm": 6.635202407836914, "learning_rate": 1.859573207044274e-07, "loss": 0.2719, "step": 1998 }, { "epoch": 0.9451536643026005, "grad_norm": 7.9684014320373535, "learning_rate": 1.8280182659295321e-07, "loss": 0.3291, "step": 1999 }, { "epoch": 0.9456264775413712, "grad_norm": 5.251444339752197, "learning_rate": 1.7967308776934755e-07, "loss": 0.2162, "step": 2000 }, { "epoch": 0.9456264775413712, "eval_accuracy": 0.8625277161862528, "eval_f1": 0.7116279069767442, "eval_loss": 0.29841360449790955, "eval_precision": 0.864406779661017, "eval_recall": 0.6047430830039525, "eval_runtime": 48.9233, "eval_samples_per_second": 5.641, "eval_steps_per_second": 0.184, "step": 2000 }, { "epoch": 0.9460992907801419, "grad_norm": 5.928063869476318, "learning_rate": 1.7657111276051852e-07, "loss": 0.2758, "step": 2001 }, { "epoch": 0.9465721040189126, "grad_norm": 5.081968784332275, "learning_rate": 1.734959100204281e-07, "loss": 0.1877, "step": 2002 }, { "epoch": 0.9470449172576832, "grad_norm": 5.424426078796387, "learning_rate": 1.704474879300766e-07, "loss": 0.216, "step": 2003 }, { "epoch": 0.9475177304964539, "grad_norm": 5.300611972808838, "learning_rate": 1.6742585479747388e-07, "loss": 0.2141, "step": 2004 }, { "epoch": 0.9479905437352246, "grad_norm": 7.5446858406066895, "learning_rate": 1.6443101885762812e-07, "loss": 0.2932, "step": 2005 }, { "epoch": 0.9484633569739953, "grad_norm": 6.073637008666992, "learning_rate": 1.614629882725094e-07, "loss": 0.2036, "step": 2006 }, { "epoch": 0.948936170212766, "grad_norm": 7.519749164581299, "learning_rate": 1.5852177113103606e-07, "loss": 0.2765, "step": 2007 }, { "epoch": 0.9494089834515367, "grad_norm": 6.116303443908691, "learning_rate": 1.5560737544905058e-07, "loss": 0.2913, "step": 2008 }, { "epoch": 0.9498817966903074, "grad_norm": 5.81624174118042, "learning_rate": 1.5271980916929497e-07, "loss": 0.2321, "step": 2009 }, { "epoch": 0.950354609929078, "grad_norm": 5.760371208190918, "learning_rate": 1.498590801613975e-07, "loss": 0.2134, "step": 2010 }, { "epoch": 0.9508274231678487, "grad_norm": 5.03253698348999, "learning_rate": 1.4702519622184053e-07, "loss": 0.2093, "step": 2011 }, { "epoch": 0.9513002364066194, "grad_norm": 4.581620693206787, "learning_rate": 1.4421816507394605e-07, "loss": 0.2063, "step": 2012 }, { "epoch": 0.9517730496453901, "grad_norm": 5.890350818634033, "learning_rate": 1.4143799436785233e-07, "loss": 0.2267, "step": 2013 }, { "epoch": 0.9522458628841608, "grad_norm": 6.05654764175415, "learning_rate": 1.3868469168049403e-07, "loss": 0.2326, "step": 2014 }, { "epoch": 0.9527186761229315, "grad_norm": 3.6070337295532227, "learning_rate": 1.3595826451558214e-07, "loss": 0.1469, "step": 2015 }, { "epoch": 0.9531914893617022, "grad_norm": 7.624080181121826, "learning_rate": 1.3325872030357955e-07, "loss": 0.2893, "step": 2016 }, { "epoch": 0.9536643026004729, "grad_norm": 6.688779354095459, "learning_rate": 1.3058606640168558e-07, "loss": 0.2668, "step": 2017 }, { "epoch": 0.9541371158392435, "grad_norm": 6.714046001434326, "learning_rate": 1.279403100938148e-07, "loss": 0.2095, "step": 2018 }, { "epoch": 0.9546099290780142, "grad_norm": 3.696683406829834, "learning_rate": 1.25321458590576e-07, "loss": 0.1431, "step": 2019 }, { "epoch": 0.9550827423167849, "grad_norm": 6.133592128753662, "learning_rate": 1.2272951902925211e-07, "loss": 0.3241, "step": 2020 }, { "epoch": 0.9550827423167849, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7162790697674418, "eval_loss": 0.2992999255657196, "eval_precision": 0.8700564971751412, "eval_recall": 0.6086956521739131, "eval_runtime": 48.4915, "eval_samples_per_second": 5.692, "eval_steps_per_second": 0.186, "step": 2020 }, { "epoch": 0.9555555555555556, "grad_norm": 7.198812007904053, "learning_rate": 1.201644984737804e-07, "loss": 0.2988, "step": 2021 }, { "epoch": 0.9560283687943263, "grad_norm": 4.9037322998046875, "learning_rate": 1.1762640391473901e-07, "loss": 0.2401, "step": 2022 }, { "epoch": 0.956501182033097, "grad_norm": 4.425469398498535, "learning_rate": 1.1511524226931914e-07, "loss": 0.1406, "step": 2023 }, { "epoch": 0.9569739952718677, "grad_norm": 5.938382625579834, "learning_rate": 1.126310203813108e-07, "loss": 0.2148, "step": 2024 }, { "epoch": 0.9574468085106383, "grad_norm": 7.644670486450195, "learning_rate": 1.1017374502108713e-07, "loss": 0.2778, "step": 2025 }, { "epoch": 0.957919621749409, "grad_norm": 5.539424896240234, "learning_rate": 1.0774342288557892e-07, "loss": 0.2106, "step": 2026 }, { "epoch": 0.9583924349881797, "grad_norm": 6.603002548217773, "learning_rate": 1.053400605982613e-07, "loss": 0.2815, "step": 2027 }, { "epoch": 0.9588652482269504, "grad_norm": 4.729203701019287, "learning_rate": 1.0296366470913477e-07, "loss": 0.2226, "step": 2028 }, { "epoch": 0.9593380614657211, "grad_norm": 7.116330623626709, "learning_rate": 1.0061424169470646e-07, "loss": 0.299, "step": 2029 }, { "epoch": 0.9598108747044918, "grad_norm": 6.153399467468262, "learning_rate": 9.829179795797339e-08, "loss": 0.2681, "step": 2030 }, { "epoch": 0.9602836879432625, "grad_norm": 4.379301071166992, "learning_rate": 9.599633982840362e-08, "loss": 0.1883, "step": 2031 }, { "epoch": 0.9607565011820332, "grad_norm": 5.625801086425781, "learning_rate": 9.372787356192181e-08, "loss": 0.1923, "step": 2032 }, { "epoch": 0.9612293144208038, "grad_norm": 4.8772077560424805, "learning_rate": 9.148640534089037e-08, "loss": 0.1565, "step": 2033 }, { "epoch": 0.9617021276595744, "grad_norm": 6.87009334564209, "learning_rate": 8.927194127408945e-08, "loss": 0.2341, "step": 2034 }, { "epoch": 0.9621749408983451, "grad_norm": 4.184564113616943, "learning_rate": 8.708448739670805e-08, "loss": 0.1848, "step": 2035 }, { "epoch": 0.9626477541371158, "grad_norm": 4.61867094039917, "learning_rate": 8.492404967031853e-08, "loss": 0.175, "step": 2036 }, { "epoch": 0.9631205673758865, "grad_norm": 3.9743919372558594, "learning_rate": 8.27906339828688e-08, "loss": 0.1485, "step": 2037 }, { "epoch": 0.9635933806146572, "grad_norm": 6.921072959899902, "learning_rate": 8.0684246148659e-08, "loss": 0.2734, "step": 2038 }, { "epoch": 0.9640661938534278, "grad_norm": 4.7037129402160645, "learning_rate": 7.860489190833043e-08, "loss": 0.1407, "step": 2039 }, { "epoch": 0.9645390070921985, "grad_norm": 5.145064353942871, "learning_rate": 7.655257692884998e-08, "loss": 0.2289, "step": 2040 }, { "epoch": 0.9645390070921985, "eval_accuracy": 0.8658536585365854, "eval_f1": 0.7192575406032483, "eval_loss": 0.29763469099998474, "eval_precision": 0.8707865168539326, "eval_recall": 0.6126482213438735, "eval_runtime": 48.2853, "eval_samples_per_second": 5.716, "eval_steps_per_second": 0.186, "step": 2040 }, { "epoch": 0.9650118203309692, "grad_norm": 5.289119243621826, "learning_rate": 7.452730680349019e-08, "loss": 0.2251, "step": 2041 }, { "epoch": 0.9654846335697399, "grad_norm": 7.4958624839782715, "learning_rate": 7.252908705181805e-08, "loss": 0.2453, "step": 2042 }, { "epoch": 0.9659574468085106, "grad_norm": 5.394641876220703, "learning_rate": 7.055792311967958e-08, "loss": 0.2879, "step": 2043 }, { "epoch": 0.9664302600472813, "grad_norm": 4.002281665802002, "learning_rate": 6.861382037918418e-08, "loss": 0.1805, "step": 2044 }, { "epoch": 0.966903073286052, "grad_norm": 5.974024295806885, "learning_rate": 6.669678412868919e-08, "loss": 0.2024, "step": 2045 }, { "epoch": 0.9673758865248226, "grad_norm": 5.801767349243164, "learning_rate": 6.480681959278645e-08, "loss": 0.2164, "step": 2046 }, { "epoch": 0.9678486997635933, "grad_norm": 4.779239177703857, "learning_rate": 6.29439319222891e-08, "loss": 0.1936, "step": 2047 }, { "epoch": 0.968321513002364, "grad_norm": 4.674015522003174, "learning_rate": 6.11081261942148e-08, "loss": 0.2035, "step": 2048 }, { "epoch": 0.9687943262411347, "grad_norm": 6.905233860015869, "learning_rate": 5.929940741177476e-08, "loss": 0.2818, "step": 2049 }, { "epoch": 0.9692671394799054, "grad_norm": 9.568391799926758, "learning_rate": 5.751778050435808e-08, "loss": 0.32, "step": 2050 }, { "epoch": 0.9697399527186761, "grad_norm": 5.665557384490967, "learning_rate": 5.5763250327518505e-08, "loss": 0.2695, "step": 2051 }, { "epoch": 0.9702127659574468, "grad_norm": 4.919648170471191, "learning_rate": 5.4035821662963285e-08, "loss": 0.2343, "step": 2052 }, { "epoch": 0.9706855791962175, "grad_norm": 3.9685451984405518, "learning_rate": 5.233549921853876e-08, "loss": 0.18, "step": 2053 }, { "epoch": 0.9711583924349881, "grad_norm": 5.1178131103515625, "learning_rate": 5.066228762821479e-08, "loss": 0.1903, "step": 2054 }, { "epoch": 0.9716312056737588, "grad_norm": 6.247317314147949, "learning_rate": 4.901619145207703e-08, "loss": 0.1892, "step": 2055 }, { "epoch": 0.9721040189125295, "grad_norm": 3.8373396396636963, "learning_rate": 4.7397215176311354e-08, "loss": 0.1359, "step": 2056 }, { "epoch": 0.9725768321513002, "grad_norm": 6.623259544372559, "learning_rate": 4.580536321319273e-08, "loss": 0.23, "step": 2057 }, { "epoch": 0.9730496453900709, "grad_norm": 5.989914894104004, "learning_rate": 4.424063990107308e-08, "loss": 0.2538, "step": 2058 }, { "epoch": 0.9735224586288416, "grad_norm": 4.51497745513916, "learning_rate": 4.270304950436788e-08, "loss": 0.1994, "step": 2059 }, { "epoch": 0.9739952718676123, "grad_norm": 4.718496799468994, "learning_rate": 4.119259621354843e-08, "loss": 0.1593, "step": 2060 }, { "epoch": 0.9739952718676123, "eval_accuracy": 0.8636363636363636, "eval_f1": 0.7132867132867133, "eval_loss": 0.29835787415504456, "eval_precision": 0.8693181818181818, "eval_recall": 0.6047430830039525, "eval_runtime": 48.4979, "eval_samples_per_second": 5.691, "eval_steps_per_second": 0.186, "step": 2060 }, { "epoch": 0.9744680851063829, "grad_norm": 4.2522358894348145, "learning_rate": 3.9709284145125205e-08, "loss": 0.2072, "step": 2061 }, { "epoch": 0.9749408983451536, "grad_norm": 6.090972900390625, "learning_rate": 3.825311734164116e-08, "loss": 0.227, "step": 2062 }, { "epoch": 0.9754137115839243, "grad_norm": 5.209742546081543, "learning_rate": 3.682409977165957e-08, "loss": 0.214, "step": 2063 }, { "epoch": 0.975886524822695, "grad_norm": 5.365957260131836, "learning_rate": 3.5422235329751756e-08, "loss": 0.1831, "step": 2064 }, { "epoch": 0.9763593380614657, "grad_norm": 9.389203071594238, "learning_rate": 3.4047527836483793e-08, "loss": 0.2723, "step": 2065 }, { "epoch": 0.9768321513002364, "grad_norm": 7.358561038970947, "learning_rate": 3.269998103841765e-08, "loss": 0.2694, "step": 2066 }, { "epoch": 0.9773049645390071, "grad_norm": 5.198401927947998, "learning_rate": 3.137959860808448e-08, "loss": 0.29, "step": 2067 }, { "epoch": 0.9777777777777777, "grad_norm": 5.073206901550293, "learning_rate": 3.008638414398801e-08, "loss": 0.2165, "step": 2068 }, { "epoch": 0.9782505910165484, "grad_norm": 5.652972221374512, "learning_rate": 2.882034117058896e-08, "loss": 0.2447, "step": 2069 }, { "epoch": 0.9787234042553191, "grad_norm": 5.199291229248047, "learning_rate": 2.7581473138296177e-08, "loss": 0.2055, "step": 2070 }, { "epoch": 0.9791962174940898, "grad_norm": 4.334774017333984, "learning_rate": 2.636978342345553e-08, "loss": 0.1535, "step": 2071 }, { "epoch": 0.9796690307328605, "grad_norm": 5.554661750793457, "learning_rate": 2.518527532834436e-08, "loss": 0.239, "step": 2072 }, { "epoch": 0.9801418439716312, "grad_norm": 5.669870853424072, "learning_rate": 2.402795208116149e-08, "loss": 0.2128, "step": 2073 }, { "epoch": 0.9806146572104019, "grad_norm": 5.936855316162109, "learning_rate": 2.2897816836014996e-08, "loss": 0.275, "step": 2074 }, { "epoch": 0.9810874704491725, "grad_norm": 10.341303825378418, "learning_rate": 2.179487267291891e-08, "loss": 0.3173, "step": 2075 }, { "epoch": 0.9815602836879432, "grad_norm": 6.050800323486328, "learning_rate": 2.071912259777875e-08, "loss": 0.2196, "step": 2076 }, { "epoch": 0.9820330969267139, "grad_norm": 5.055636882781982, "learning_rate": 1.967056954238933e-08, "loss": 0.181, "step": 2077 }, { "epoch": 0.9825059101654846, "grad_norm": 7.4767632484436035, "learning_rate": 1.864921636442252e-08, "loss": 0.201, "step": 2078 }, { "epoch": 0.9829787234042553, "grad_norm": 6.8587493896484375, "learning_rate": 1.7655065847423935e-08, "loss": 0.3132, "step": 2079 }, { "epoch": 0.983451536643026, "grad_norm": 7.90069580078125, "learning_rate": 1.6688120700798505e-08, "loss": 0.2018, "step": 2080 }, { "epoch": 0.983451536643026, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7175925925925926, "eval_loss": 0.29836517572402954, "eval_precision": 0.8659217877094972, "eval_recall": 0.6126482213438735, "eval_runtime": 48.9715, "eval_samples_per_second": 5.636, "eval_steps_per_second": 0.184, "step": 2080 }, { "epoch": 0.9839243498817967, "grad_norm": 6.26698637008667, "learning_rate": 1.5748383559809345e-08, "loss": 0.2399, "step": 2081 }, { "epoch": 0.9843971631205674, "grad_norm": 6.140974044799805, "learning_rate": 1.4835856985568887e-08, "loss": 0.2634, "step": 2082 }, { "epoch": 0.984869976359338, "grad_norm": 4.758864879608154, "learning_rate": 1.3950543465027777e-08, "loss": 0.2022, "step": 2083 }, { "epoch": 0.9853427895981087, "grad_norm": 6.061093330383301, "learning_rate": 1.3092445410977094e-08, "loss": 0.2611, "step": 2084 }, { "epoch": 0.9858156028368794, "grad_norm": 5.0369696617126465, "learning_rate": 1.2261565162030586e-08, "loss": 0.242, "step": 2085 }, { "epoch": 0.9862884160756501, "grad_norm": 4.759927272796631, "learning_rate": 1.1457904982627998e-08, "loss": 0.2424, "step": 2086 }, { "epoch": 0.9867612293144208, "grad_norm": 4.427268028259277, "learning_rate": 1.0681467063022866e-08, "loss": 0.1903, "step": 2087 }, { "epoch": 0.9872340425531915, "grad_norm": 5.498013496398926, "learning_rate": 9.932253519280289e-09, "loss": 0.1198, "step": 2088 }, { "epoch": 0.9877068557919622, "grad_norm": 5.413758754730225, "learning_rate": 9.210266393266942e-09, "loss": 0.231, "step": 2089 }, { "epoch": 0.9881796690307328, "grad_norm": 7.1858134269714355, "learning_rate": 8.515507652649968e-09, "loss": 0.26, "step": 2090 }, { "epoch": 0.9886524822695035, "grad_norm": 4.840980052947998, "learning_rate": 7.84797919089031e-09, "loss": 0.2581, "step": 2091 }, { "epoch": 0.9891252955082742, "grad_norm": 5.378105640411377, "learning_rate": 7.20768282723383e-09, "loss": 0.2107, "step": 2092 }, { "epoch": 0.9895981087470449, "grad_norm": 8.181370735168457, "learning_rate": 6.5946203067135395e-09, "loss": 0.2245, "step": 2093 }, { "epoch": 0.9900709219858156, "grad_norm": 5.936405181884766, "learning_rate": 6.008793300136262e-09, "loss": 0.1958, "step": 2094 }, { "epoch": 0.9905437352245863, "grad_norm": 6.984827995300293, "learning_rate": 5.450203404087084e-09, "loss": 0.2338, "step": 2095 }, { "epoch": 0.991016548463357, "grad_norm": 5.687265872955322, "learning_rate": 4.918852140916031e-09, "loss": 0.2498, "step": 2096 }, { "epoch": 0.9914893617021276, "grad_norm": 8.568177223205566, "learning_rate": 4.414740958742503e-09, "loss": 0.3252, "step": 2097 }, { "epoch": 0.9919621749408983, "grad_norm": 4.833063125610352, "learning_rate": 3.937871231444179e-09, "loss": 0.1798, "step": 2098 }, { "epoch": 0.992434988179669, "grad_norm": 4.7450056076049805, "learning_rate": 3.4882442586570143e-09, "loss": 0.1758, "step": 2099 }, { "epoch": 0.9929078014184397, "grad_norm": 5.54990291595459, "learning_rate": 3.0658612657730182e-09, "loss": 0.2018, "step": 2100 }, { "epoch": 0.9929078014184397, "eval_accuracy": 0.8647450110864745, "eval_f1": 0.7162790697674418, "eval_loss": 0.2974694073200226, "eval_precision": 0.8700564971751412, "eval_recall": 0.6086956521739131, "eval_runtime": 47.9735, "eval_samples_per_second": 5.753, "eval_steps_per_second": 0.188, "step": 2100 }, { "epoch": 0.9933806146572104, "grad_norm": 5.17624044418335, "learning_rate": 2.6707234039302642e-09, "loss": 0.2462, "step": 2101 }, { "epoch": 0.9938534278959811, "grad_norm": 6.951145648956299, "learning_rate": 2.302831750020662e-09, "loss": 0.2213, "step": 2102 }, { "epoch": 0.9943262411347518, "grad_norm": 4.699036598205566, "learning_rate": 1.962187306674412e-09, "loss": 0.2068, "step": 2103 }, { "epoch": 0.9947990543735225, "grad_norm": 5.011316776275635, "learning_rate": 1.6487910022666698e-09, "loss": 0.154, "step": 2104 }, { "epoch": 0.9952718676122931, "grad_norm": 5.612926483154297, "learning_rate": 1.3626436909131014e-09, "loss": 0.2245, "step": 2105 }, { "epoch": 0.9957446808510638, "grad_norm": 6.049012184143066, "learning_rate": 1.1037461524643355e-09, "loss": 0.2118, "step": 2106 }, { "epoch": 0.9962174940898345, "grad_norm": 6.17867374420166, "learning_rate": 8.720990925059625e-10, "loss": 0.281, "step": 2107 }, { "epoch": 0.9966903073286052, "grad_norm": 5.029500484466553, "learning_rate": 6.677031423574232e-10, "loss": 0.2281, "step": 2108 }, { "epoch": 0.9971631205673759, "grad_norm": 6.733471393585205, "learning_rate": 4.905588590686794e-10, "loss": 0.2776, "step": 2109 }, { "epoch": 0.9976359338061466, "grad_norm": 9.003266334533691, "learning_rate": 3.4066672541910317e-10, "loss": 0.3067, "step": 2110 }, { "epoch": 0.9981087470449173, "grad_norm": 6.541497230529785, "learning_rate": 2.180271499185871e-10, "loss": 0.2418, "step": 2111 }, { "epoch": 0.9985815602836879, "grad_norm": 3.8773562908172607, "learning_rate": 1.2264046680088294e-10, "loss": 0.1889, "step": 2112 }, { "epoch": 0.9990543735224586, "grad_norm": 5.770922660827637, "learning_rate": 5.4506936030263026e-11, "loss": 0.2226, "step": 2113 }, { "epoch": 0.9995271867612293, "grad_norm": 4.714475631713867, "learning_rate": 1.3626743291528244e-11, "loss": 0.1988, "step": 2114 }, { "epoch": 1.0, "grad_norm": 9.07297134399414, "learning_rate": 0.0, "loss": 0.2536, "step": 2115 } ], "logging_steps": 1, "max_steps": 2115, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.4142614901253734e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }